From 5c0397c4562a420bb933f179f51770a71b407dd0 Mon Sep 17 00:00:00 2001
From: xffxff <1247714429@qq.com>
Date: Thu, 24 Aug 2023 09:29:08 +0800
Subject: [PATCH 1/7] add some comments to filter_with_lists and add a basic
 test

---
 Cargo.lock                |  1 +
 Cargo.toml                |  1 +
 src/results/aggregator.rs | 66 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 67 insertions(+), 1 deletion(-)
diff --git a/Cargo.lock b/Cargo.lock
index eccdff7..1af829d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3566,6 +3566,7 @@ dependencies = [
  "scraper",
  "serde",
  "serde_json",
+ "tempfile",
  "tokio 1.32.0",
 ]
 
diff --git a/Cargo.toml b/Cargo.toml
index c5f9013..9e92f5b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,7 @@ once_cell = {version="1.18.0"}
 error-stack = {version="0.3.1"}
 async-trait = {version="0.1.73"}
 regex = {version="1.9.3", features=["perf"]}
+tempfile = "3.8.0"
 
 [dev-dependencies]
 rusty-hook = "^0.11.2"
diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs
index 23ed091..e753aea 100644
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@@ -175,22 +175,86 @@ pub async fn aggregate(
     ))
 }
 
-fn filter_with_lists(
+/// Filters a map of search results using a list of regex patterns.
+///
+/// # Arguments
+///
+/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
+/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
+/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
+///
+/// # Errors
+///
+/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
+pub fn filter_with_lists(
     map_to_be_filtered: &mut HashMap<String, SearchResult>,
     resultant_map: &mut HashMap<String, SearchResult>,
     file_path: &str,
 ) -> Result<(), Box<dyn std::error::Error>> {
     let mut reader = BufReader::new(File::open(file_path)?);
+
     for line in reader.by_ref().lines() {
         let re = Regex::new(&line?)?;
+
+        // Iterate over each search result in the map and check if it matches the regex pattern
         for (url, search_result) in map_to_be_filtered.clone().into_iter() {
             if re.is_match(&url.to_lowercase())
                 || re.is_match(&search_result.title.to_lowercase())
                 || re.is_match(&search_result.description.to_lowercase())
             {
+                // If the search result matches the regex pattern, move it from the original map to the resultant map
                 resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
             }
         }
     }
+
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashMap;
+    use std::io::Write;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
+        // Create a map of search results to filter
+        let mut map_to_be_filtered = HashMap::new();
+        map_to_be_filtered.insert(
+            "https://www.example.com".to_string(),
+            SearchResult {
+                title: "Example Domain".to_string(),
+                url: "https://www.example.com".to_string(),
+                description: "This domain is for use in illustrative examples in documents.".to_string(),
+                engine: vec!["Google".to_string(), "Bing".to_string()],
+            },
+        );
+        map_to_be_filtered.insert(
+            "https://www.rust-lang.org/".to_string(),
+            SearchResult {
+                title: "Rust Programming Language".to_string(),
+                url: "https://www.rust-lang.org/".to_string(),
+                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
+                engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
+            },
+        );
+
+        // Create a temporary file with regex patterns
+        let mut file = NamedTempFile::new()?;
+        writeln!(file, "example")?;
+        writeln!(file, "rust")?;
+        file.flush()?;
+
+        let mut resultant_map = HashMap::new();
+        filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap())?;
+
+        assert_eq!(resultant_map.len(), 2);
+        assert!(resultant_map.contains_key("https://www.example.com"));
+        assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
+        assert_eq!(map_to_be_filtered.len(), 0);
+
+        Ok(())
+    }
+}
\ No newline at end of file

From 4280545e8c1078187486958209b5f82ed660c615 Mon Sep 17 00:00:00 2001
From: xffxff <1247714429@qq.com>
Date: Thu, 24 Aug 2023 09:32:22 +0800
Subject: [PATCH 2/7] add a test for non-existent file

---
 src/results/aggregator.rs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs
index e753aea..c2d6885 100644
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@@ -257,4 +257,16 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_filter_with_lists_file_not_found() {
+        let mut map_to_be_filtered = HashMap::new();
+
+        let mut resultant_map = HashMap::new();
+
+        // Call the `filter_with_lists` function with a non-existent file path
+        let result = filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, "non-existent-file.txt");
+
+        assert!(result.is_err());
+    }
 }
\ No newline at end of file

From a2fc10ca3943b920d63b668d5a8935f4ea842d0b Mon Sep 17 00:00:00 2001
From: xffxff <1247714429@qq.com>
Date: Thu, 24 Aug 2023 09:36:08 +0800
Subject: [PATCH 3/7] add a test for invalid regex

---
 src/results/aggregator.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs
index c2d6885..c2a2cdb 100644
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@@ -269,4 +269,29 @@ mod tests {
 
         assert!(result.is_err());
     }
+
+    #[test]
+    fn test_filter_with_lists_invalid_regex() {
+        let mut map_to_be_filtered = HashMap::new();
+        map_to_be_filtered.insert(
+            "https://www.example.com".to_string(),
+            SearchResult {
+                title: "Example Domain".to_string(),
+                url: "https://www.example.com".to_string(),
+                description: "This domain is for use in illustrative examples in documents.".to_string(),
+                engine: vec!["Google".to_string(), "Bing".to_string()],
+            },
+        );
+
+        let mut resultant_map = HashMap::new();
+
+        // Create a temporary file with an invalid regex pattern
+        let mut file = NamedTempFile::new().unwrap();
+        writeln!(file, "example(").unwrap();
+        file.flush().unwrap();
+
+        let result = filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap());
+
+        assert!(result.is_err());
+}
 }
\ No newline at end of file

From 23ff24bdf3b6067aa38f2ffb61112c424c1d0a49 Mon Sep 17 00:00:00 2001
From: xffxff <1247714429@qq.com>
Date: Thu, 24 Aug 2023 09:46:01 +0800
Subject: [PATCH 4/7] add a test to check if the regex wildcard .* matches any
 character

---
 src/results/aggregator.rs | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs
index c2a2cdb..9b93f18 100644
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@@ -258,6 +258,45 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
+        let mut map_to_be_filtered = HashMap::new();
+        map_to_be_filtered.insert(
+            "https://www.example.com".to_string(),
+            SearchResult {
+                title: "Example Domain".to_string(),
+                url: "https://www.example.com".to_string(),
+                description: "This domain is for use in illustrative examples in documents.".to_string(),
+                engine: vec!["Google".to_string(), "Bing".to_string()],
+            },
+        );
+        map_to_be_filtered.insert(
+            "https://www.rust-lang.org/".to_string(),
+            SearchResult {
+                title: "Rust Programming Language".to_string(),
+                url: "https://www.rust-lang.org/".to_string(),
+                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
+                engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
+            },
+        );
+
+        // Create a temporary file with a regex pattern containing a wildcard
+        let mut file = NamedTempFile::new()?;
+        writeln!(file, "ex.*le")?;
+        file.flush()?;
+
+        let mut resultant_map = HashMap::new();
+
+        filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap())?;
+
+        assert_eq!(resultant_map.len(), 1);
+        assert!(resultant_map.contains_key("https://www.example.com"));
+        assert_eq!(map_to_be_filtered.len(), 1);
+        assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
+
+        Ok(())
+    }
+
     #[test]
     fn test_filter_with_lists_file_not_found() {
         let mut map_to_be_filtered = HashMap::new();

From c3a7c917f66969f0e77dbc8ac47ec36de9881b26 Mon Sep 17 00:00:00 2001
From: xffxff <1247714429@qq.com>
Date: Thu, 24 Aug 2023 09:50:19 +0800
Subject: [PATCH 5/7] make format happy

---
 src/results/aggregator.rs | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs
index 9b93f18..3f06ecb 100644
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@@ -227,7 +227,8 @@ mod tests {
             SearchResult {
                 title: "Example Domain".to_string(),
                 url: "https://www.example.com".to_string(),
-                description: "This domain is for use in illustrative examples in documents.".to_string(),
+                description: "This domain is for use in illustrative examples in documents."
+                    .to_string(),
                 engine: vec!["Google".to_string(), "Bing".to_string()],
             },
         );
@@ -248,7 +249,11 @@ mod tests {
         file.flush()?;
 
         let mut resultant_map = HashMap::new();
-        filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap())?;
+        filter_with_lists(
+            &mut map_to_be_filtered,
+            &mut resultant_map,
+            file.path().to_str().unwrap(),
+        )?;
 
         assert_eq!(resultant_map.len(), 2);
         assert!(resultant_map.contains_key("https://www.example.com"));
@@ -266,7 +271,8 @@ mod tests {
             SearchResult {
                 title: "Example Domain".to_string(),
                 url: "https://www.example.com".to_string(),
-                description: "This domain is for use in illustrative examples in documents.".to_string(),
+                description: "This domain is for use in illustrative examples in documents."
+                    .to_string(),
                 engine: vec!["Google".to_string(), "Bing".to_string()],
             },
         );
@@ -287,7 +293,11 @@ mod tests {
 
         let mut resultant_map = HashMap::new();
 
-        filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap())?;
+        filter_with_lists(
+            &mut map_to_be_filtered,
+            &mut resultant_map,
+            file.path().to_str().unwrap(),
+        )?;
 
         assert_eq!(resultant_map.len(), 1);
         assert!(resultant_map.contains_key("https://www.example.com"));
@@ -304,7 +314,11 @@ mod tests {
         let mut resultant_map = HashMap::new();
 
         // Call the `filter_with_lists` function with a non-existent file path
-        let result = filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, "non-existent-file.txt");
+        let result = filter_with_lists(
+            &mut map_to_be_filtered,
+            &mut resultant_map,
+            "non-existent-file.txt",
+        );
 
         assert!(result.is_err());
     }
@@ -317,7 +331,8 @@ mod tests {
             SearchResult {
                 title: "Example Domain".to_string(),
                 url: "https://www.example.com".to_string(),
-                description: "This domain is for use in illustrative examples in documents.".to_string(),
+                description: "This domain is for use in illustrative examples in documents."
+                    .to_string(),
                 engine: vec!["Google".to_string(), "Bing".to_string()],
             },
         );
@@ -329,8 +344,12 @@ mod tests {
         writeln!(file, "example(").unwrap();
         file.flush().unwrap();
 
-        let result = filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap());
+        let result = filter_with_lists(
+            &mut map_to_be_filtered,
+            &mut resultant_map,
+            file.path().to_str().unwrap(),
+        );
 
         assert!(result.is_err());
+    }
 }
-}
\ No newline at end of file

From e5a022776246ce068aca69f913c4ab49ca57c3b2 Mon Sep 17 00:00:00 2001
From: xffxff <1247714429@qq.com>
Date: Thu, 24 Aug 2023 17:10:40 +0800
Subject: [PATCH 6/7] put `tempfile` under `dev.dependencies`

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9e92f5b..31f29cd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,11 +27,11 @@ once_cell = {version="1.18.0"}
 error-stack = {version="0.3.1"}
 async-trait = {version="0.1.73"}
 regex = {version="1.9.3", features=["perf"]}
-tempfile = "3.8.0"
 
 [dev-dependencies]
 rusty-hook = "^0.11.2"
 criterion = "0.5.1"
+tempfile = "3.8.0"
 
 [profile.dev]
 opt-level = 0

From 64948b84f1eb5ccd66af6ff8da808d05ad66864e Mon Sep 17 00:00:00 2001
From: xffxff <1247714429@qq.com>
Date: Thu, 24 Aug 2023 17:11:09 +0800
Subject: [PATCH 7/7] bump version

---
 Cargo.lock | 2 +-
 Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1af829d..412ae83 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3543,7 +3543,7 @@ dependencies = [
 
 [[package]]
 name = "websurfx"
-version = "0.18.0"
+version = "0.18.1"
 dependencies = [
  "actix-cors",
  "actix-files",
diff --git a/Cargo.toml b/Cargo.toml
index 31f29cd..d36117b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "websurfx"
-version = "0.18.0"
+version = "0.18.1"
 edition = "2021"
 description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
 repository = "https://github.com/neon-mmd/websurfx"