mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 05:58:21 -05:00
Merge pull request #191 from xffxff/unit_tests
🧹 Unit tests for the function `filter_with_lists`
This commit is contained in:
commit
7a8bf022d4
3
Cargo.lock
generated
3
Cargo.lock
generated
@ -3543,7 +3543,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "0.18.0"
|
version = "0.18.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-files",
|
"actix-files",
|
||||||
@ -3566,6 +3566,7 @@ dependencies = [
|
|||||||
"scraper",
|
"scraper",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"tempfile",
|
||||||
"tokio 1.32.0",
|
"tokio 1.32.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "0.18.0"
|
version = "0.18.1"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||||
repository = "https://github.com/neon-mmd/websurfx"
|
repository = "https://github.com/neon-mmd/websurfx"
|
||||||
@ -31,6 +31,7 @@ regex = {version="1.9.3", features=["perf"]}
|
|||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
rusty-hook = "^0.11.2"
|
rusty-hook = "^0.11.2"
|
||||||
criterion = "0.5.1"
|
criterion = "0.5.1"
|
||||||
|
tempfile = "3.8.0"
|
||||||
|
|
||||||
[profile.dev]
|
[profile.dev]
|
||||||
opt-level = 0
|
opt-level = 0
|
||||||
|
@ -175,22 +175,181 @@ pub async fn aggregate(
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filter_with_lists(
|
/// Filters a map of search results using a list of regex patterns.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
|
||||||
|
/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
|
||||||
|
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
||||||
|
pub fn filter_with_lists(
|
||||||
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
||||||
resultant_map: &mut HashMap<String, SearchResult>,
|
resultant_map: &mut HashMap<String, SearchResult>,
|
||||||
file_path: &str,
|
file_path: &str,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut reader = BufReader::new(File::open(file_path)?);
|
let mut reader = BufReader::new(File::open(file_path)?);
|
||||||
|
|
||||||
for line in reader.by_ref().lines() {
|
for line in reader.by_ref().lines() {
|
||||||
let re = Regex::new(&line?)?;
|
let re = Regex::new(&line?)?;
|
||||||
|
|
||||||
|
// Iterate over each search result in the map and check if it matches the regex pattern
|
||||||
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
||||||
if re.is_match(&url.to_lowercase())
|
if re.is_match(&url.to_lowercase())
|
||||||
|| re.is_match(&search_result.title.to_lowercase())
|
|| re.is_match(&search_result.title.to_lowercase())
|
||||||
|| re.is_match(&search_result.description.to_lowercase())
|
|| re.is_match(&search_result.description.to_lowercase())
|
||||||
{
|
{
|
||||||
|
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||||
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::io::Write;
|
||||||
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Create a map of search results to filter
|
||||||
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
|
map_to_be_filtered.insert(
|
||||||
|
"https://www.example.com".to_string(),
|
||||||
|
SearchResult {
|
||||||
|
title: "Example Domain".to_string(),
|
||||||
|
url: "https://www.example.com".to_string(),
|
||||||
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
|
.to_string(),
|
||||||
|
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
map_to_be_filtered.insert(
|
||||||
|
"https://www.rust-lang.org/".to_string(),
|
||||||
|
SearchResult {
|
||||||
|
title: "Rust Programming Language".to_string(),
|
||||||
|
url: "https://www.rust-lang.org/".to_string(),
|
||||||
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||||
|
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create a temporary file with regex patterns
|
||||||
|
let mut file = NamedTempFile::new()?;
|
||||||
|
writeln!(file, "example")?;
|
||||||
|
writeln!(file, "rust")?;
|
||||||
|
file.flush()?;
|
||||||
|
|
||||||
|
let mut resultant_map = HashMap::new();
|
||||||
|
filter_with_lists(
|
||||||
|
&mut map_to_be_filtered,
|
||||||
|
&mut resultant_map,
|
||||||
|
file.path().to_str().unwrap(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
assert_eq!(resultant_map.len(), 2);
|
||||||
|
assert!(resultant_map.contains_key("https://www.example.com"));
|
||||||
|
assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
|
||||||
|
assert_eq!(map_to_be_filtered.len(), 0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
|
map_to_be_filtered.insert(
|
||||||
|
"https://www.example.com".to_string(),
|
||||||
|
SearchResult {
|
||||||
|
title: "Example Domain".to_string(),
|
||||||
|
url: "https://www.example.com".to_string(),
|
||||||
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
|
.to_string(),
|
||||||
|
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
map_to_be_filtered.insert(
|
||||||
|
"https://www.rust-lang.org/".to_string(),
|
||||||
|
SearchResult {
|
||||||
|
title: "Rust Programming Language".to_string(),
|
||||||
|
url: "https://www.rust-lang.org/".to_string(),
|
||||||
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||||
|
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create a temporary file with a regex pattern containing a wildcard
|
||||||
|
let mut file = NamedTempFile::new()?;
|
||||||
|
writeln!(file, "ex.*le")?;
|
||||||
|
file.flush()?;
|
||||||
|
|
||||||
|
let mut resultant_map = HashMap::new();
|
||||||
|
|
||||||
|
filter_with_lists(
|
||||||
|
&mut map_to_be_filtered,
|
||||||
|
&mut resultant_map,
|
||||||
|
file.path().to_str().unwrap(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
assert_eq!(resultant_map.len(), 1);
|
||||||
|
assert!(resultant_map.contains_key("https://www.example.com"));
|
||||||
|
assert_eq!(map_to_be_filtered.len(), 1);
|
||||||
|
assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_filter_with_lists_file_not_found() {
|
||||||
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
|
|
||||||
|
let mut resultant_map = HashMap::new();
|
||||||
|
|
||||||
|
// Call the `filter_with_lists` function with a non-existent file path
|
||||||
|
let result = filter_with_lists(
|
||||||
|
&mut map_to_be_filtered,
|
||||||
|
&mut resultant_map,
|
||||||
|
"non-existent-file.txt",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_filter_with_lists_invalid_regex() {
|
||||||
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
|
map_to_be_filtered.insert(
|
||||||
|
"https://www.example.com".to_string(),
|
||||||
|
SearchResult {
|
||||||
|
title: "Example Domain".to_string(),
|
||||||
|
url: "https://www.example.com".to_string(),
|
||||||
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
|
.to_string(),
|
||||||
|
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut resultant_map = HashMap::new();
|
||||||
|
|
||||||
|
// Create a temporary file with an invalid regex pattern
|
||||||
|
let mut file = NamedTempFile::new().unwrap();
|
||||||
|
writeln!(file, "example(").unwrap();
|
||||||
|
file.flush().unwrap();
|
||||||
|
|
||||||
|
let result = filter_with_lists(
|
||||||
|
&mut map_to_be_filtered,
|
||||||
|
&mut resultant_map,
|
||||||
|
file.path().to_str().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user