Merge pull request #191 from xffxff/unit_tests

🧹 Unit tests for the function `filter_with_lists`
2024-11-21 21:48:21 -05:00 · 2023-08-24 15:49:34 +03:00 · 2023-08-24 15:49:34 +03:00 · 7a8bf022d4
commit 7a8bf022d4
parent 7f371bf91b 64948b84f1
3 changed files with 164 additions and 3 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3543,7 +3543,7 @@ dependencies = [
 [[package]]
 name = "websurfx"
-version = "0.18.0"
+version = "0.18.1"
 dependencies = [
 "actix-cors",
 "actix-files",
@ -3566,6 +3566,7 @@ dependencies = [
 "scraper",
 "serde",
 "serde_json",
 "tempfile",
 "tokio 1.32.0",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "websurfx"
-version = "0.18.0"
+version = "0.18.1"
 edition = "2021"
 description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
 repository = "https://github.com/neon-mmd/websurfx"
@ -31,6 +31,7 @@ regex = {version="1.9.3", features=["perf"]}
 [dev-dependencies]
 rusty-hook = "^0.11.2"
 criterion = "0.5.1"
 tempfile = "3.8.0"
 [profile.dev]
 opt-level = 0
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@ -175,22 +175,181 @@ pub async fn aggregate(
    ))
 }
-fn filter_with_lists(
+/// Filters a map of search results using a list of regex patterns.
 ///
 /// # Arguments
 ///
 /// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
 /// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
 /// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
 ///
 /// # Errors
 ///
 /// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
 pub fn filter_with_lists(
    map_to_be_filtered: &mut HashMap<String, SearchResult>,
    resultant_map: &mut HashMap<String, SearchResult>,
    file_path: &str,
 ) -> Result<(), Box<dyn std::error::Error>> {
    let mut reader = BufReader::new(File::open(file_path)?);
    for line in reader.by_ref().lines() {
        let re = Regex::new(&line?)?;
        // Iterate over each search result in the map and check if it matches the regex pattern
        for (url, search_result) in map_to_be_filtered.clone().into_iter() {
            if re.is_match(&url.to_lowercase())
                || re.is_match(&search_result.title.to_lowercase())
                || re.is_match(&search_result.description.to_lowercase())
            {
                // If the search result matches the regex pattern, move it from the original map to the resultant map
                resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
            }
        }
    }
    Ok(())
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::collections::HashMap;
    use std::io::Write;
    use tempfile::NamedTempFile;
    #[test]
    fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
        // Create a map of search results to filter
        let mut map_to_be_filtered = HashMap::new();
        map_to_be_filtered.insert(
            "https://www.example.com".to_string(),
            SearchResult {
                title: "Example Domain".to_string(),
                url: "https://www.example.com".to_string(),
                description: "This domain is for use in illustrative examples in documents."
                    .to_string(),
                engine: vec!["Google".to_string(), "Bing".to_string()],
            },
        );
        map_to_be_filtered.insert(
            "https://www.rust-lang.org/".to_string(),
            SearchResult {
                title: "Rust Programming Language".to_string(),
                url: "https://www.rust-lang.org/".to_string(),
                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
                engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
            },
        );
        // Create a temporary file with regex patterns
        let mut file = NamedTempFile::new()?;
        writeln!(file, "example")?;
        writeln!(file, "rust")?;
        file.flush()?;
        let mut resultant_map = HashMap::new();
        filter_with_lists(
            &mut map_to_be_filtered,
            &mut resultant_map,
            file.path().to_str().unwrap(),
        )?;
        assert_eq!(resultant_map.len(), 2);
        assert!(resultant_map.contains_key("https://www.example.com"));
        assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
        assert_eq!(map_to_be_filtered.len(), 0);
        Ok(())
    }
    #[test]
    fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
        let mut map_to_be_filtered = HashMap::new();
        map_to_be_filtered.insert(
            "https://www.example.com".to_string(),
            SearchResult {
                title: "Example Domain".to_string(),
                url: "https://www.example.com".to_string(),
                description: "This domain is for use in illustrative examples in documents."
                    .to_string(),
                engine: vec!["Google".to_string(), "Bing".to_string()],
            },
        );
        map_to_be_filtered.insert(
            "https://www.rust-lang.org/".to_string(),
            SearchResult {
                title: "Rust Programming Language".to_string(),
                url: "https://www.rust-lang.org/".to_string(),
                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
                engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
            },
        );
        // Create a temporary file with a regex pattern containing a wildcard
        let mut file = NamedTempFile::new()?;
        writeln!(file, "ex.*le")?;
        file.flush()?;
        let mut resultant_map = HashMap::new();
        filter_with_lists(
            &mut map_to_be_filtered,
            &mut resultant_map,
            file.path().to_str().unwrap(),
        )?;
        assert_eq!(resultant_map.len(), 1);
        assert!(resultant_map.contains_key("https://www.example.com"));
        assert_eq!(map_to_be_filtered.len(), 1);
        assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
        Ok(())
    }
    #[test]
    fn test_filter_with_lists_file_not_found() {
        let mut map_to_be_filtered = HashMap::new();
        let mut resultant_map = HashMap::new();
        // Call the `filter_with_lists` function with a non-existent file path
        let result = filter_with_lists(
            &mut map_to_be_filtered,
            &mut resultant_map,
            "non-existent-file.txt",
        );
        assert!(result.is_err());
    }
    #[test]
    fn test_filter_with_lists_invalid_regex() {
        let mut map_to_be_filtered = HashMap::new();
        map_to_be_filtered.insert(
            "https://www.example.com".to_string(),
            SearchResult {
                title: "Example Domain".to_string(),
                url: "https://www.example.com".to_string(),
                description: "This domain is for use in illustrative examples in documents."
                    .to_string(),
                engine: vec!["Google".to_string(), "Bing".to_string()],
            },
        );
        let mut resultant_map = HashMap::new();
        // Create a temporary file with an invalid regex pattern
        let mut file = NamedTempFile::new().unwrap();
        writeln!(file, "example(").unwrap();
        file.flush().unwrap();
        let result = filter_with_lists(
            &mut map_to_be_filtered,
            &mut resultant_map,
            file.path().to_str().unwrap(),
        );
        assert!(result.is_err());
    }
 }