diff --git a/Cargo.lock b/Cargo.lock index eccdff7..412ae83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3543,7 +3543,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.18.0" +version = "0.18.1" dependencies = [ "actix-cors", "actix-files", @@ -3566,6 +3566,7 @@ dependencies = [ "scraper", "serde", "serde_json", + "tempfile", "tokio 1.32.0", ] diff --git a/Cargo.toml b/Cargo.toml index c5f9013..d36117b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.18.0" +version = "0.18.1" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" @@ -31,6 +31,7 @@ regex = {version="1.9.3", features=["perf"]} [dev-dependencies] rusty-hook = "^0.11.2" criterion = "0.5.1" +tempfile = "3.8.0" [profile.dev] opt-level = 0 diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 23ed091..3f06ecb 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -175,22 +175,181 @@ pub async fn aggregate( )) } -fn filter_with_lists( +/// Filters a map of search results using a list of regex patterns. +/// +/// # Arguments +/// +/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from. +/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results. +/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering. +/// +/// # Errors +/// +/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid. +pub fn filter_with_lists( map_to_be_filtered: &mut HashMap, resultant_map: &mut HashMap, file_path: &str, ) -> Result<(), Box> { let mut reader = BufReader::new(File::open(file_path)?); + for line in reader.by_ref().lines() { let re = Regex::new(&line?)?; + + // Iterate over each search result in the map and check if it matches the regex pattern for (url, search_result) in map_to_be_filtered.clone().into_iter() { if re.is_match(&url.to_lowercase()) || re.is_match(&search_result.title.to_lowercase()) || re.is_match(&search_result.description.to_lowercase()) { + // If the search result matches the regex pattern, move it from the original map to the resultant map resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap()); } } } + Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::io::Write; + use tempfile::NamedTempFile; + + #[test] + fn test_filter_with_lists() -> Result<(), Box> { + // Create a map of search results to filter + let mut map_to_be_filtered = HashMap::new(); + map_to_be_filtered.insert( + "https://www.example.com".to_string(), + SearchResult { + title: "Example Domain".to_string(), + url: "https://www.example.com".to_string(), + description: "This domain is for use in illustrative examples in documents." + .to_string(), + engine: vec!["Google".to_string(), "Bing".to_string()], + }, + ); + map_to_be_filtered.insert( + "https://www.rust-lang.org/".to_string(), + SearchResult { + title: "Rust Programming Language".to_string(), + url: "https://www.rust-lang.org/".to_string(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(), + engine: vec!["Google".to_string(), "DuckDuckGo".to_string()], + }, + ); + + // Create a temporary file with regex patterns + let mut file = NamedTempFile::new()?; + writeln!(file, "example")?; + writeln!(file, "rust")?; + file.flush()?; + + let mut resultant_map = HashMap::new(); + filter_with_lists( + &mut map_to_be_filtered, + &mut resultant_map, + file.path().to_str().unwrap(), + )?; + + assert_eq!(resultant_map.len(), 2); + assert!(resultant_map.contains_key("https://www.example.com")); + assert!(resultant_map.contains_key("https://www.rust-lang.org/")); + assert_eq!(map_to_be_filtered.len(), 0); + + Ok(()) + } + + #[test] + fn test_filter_with_lists_wildcard() -> Result<(), Box> { + let mut map_to_be_filtered = HashMap::new(); + map_to_be_filtered.insert( + "https://www.example.com".to_string(), + SearchResult { + title: "Example Domain".to_string(), + url: "https://www.example.com".to_string(), + description: "This domain is for use in illustrative examples in documents." + .to_string(), + engine: vec!["Google".to_string(), "Bing".to_string()], + }, + ); + map_to_be_filtered.insert( + "https://www.rust-lang.org/".to_string(), + SearchResult { + title: "Rust Programming Language".to_string(), + url: "https://www.rust-lang.org/".to_string(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(), + engine: vec!["Google".to_string(), "DuckDuckGo".to_string()], + }, + ); + + // Create a temporary file with a regex pattern containing a wildcard + let mut file = NamedTempFile::new()?; + writeln!(file, "ex.*le")?; + file.flush()?; + + let mut resultant_map = HashMap::new(); + + filter_with_lists( + &mut map_to_be_filtered, + &mut resultant_map, + file.path().to_str().unwrap(), + )?; + + assert_eq!(resultant_map.len(), 1); + assert!(resultant_map.contains_key("https://www.example.com")); + assert_eq!(map_to_be_filtered.len(), 1); + assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/")); + + Ok(()) + } + + #[test] + fn test_filter_with_lists_file_not_found() { + let mut map_to_be_filtered = HashMap::new(); + + let mut resultant_map = HashMap::new(); + + // Call the `filter_with_lists` function with a non-existent file path + let result = filter_with_lists( + &mut map_to_be_filtered, + &mut resultant_map, + "non-existent-file.txt", + ); + + assert!(result.is_err()); + } + + #[test] + fn test_filter_with_lists_invalid_regex() { + let mut map_to_be_filtered = HashMap::new(); + map_to_be_filtered.insert( + "https://www.example.com".to_string(), + SearchResult { + title: "Example Domain".to_string(), + url: "https://www.example.com".to_string(), + description: "This domain is for use in illustrative examples in documents." + .to_string(), + engine: vec!["Google".to_string(), "Bing".to_string()], + }, + ); + + let mut resultant_map = HashMap::new(); + + // Create a temporary file with an invalid regex pattern + let mut file = NamedTempFile::new().unwrap(); + writeln!(file, "example(").unwrap(); + file.flush().unwrap(); + + let result = filter_with_lists( + &mut map_to_be_filtered, + &mut resultant_map, + file.path().to_str().unwrap(), + ); + + assert!(result.is_err()); + } +}