0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-25 15:38:21 -05:00

⚙️ refactor: add several minor optimizations (#180)(#178)

This commit is contained in:
neon_arch 2023-08-27 21:04:41 +03:00
parent 2885f23ec9
commit 13ce420642

View File

@ -64,14 +64,14 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct` /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
/// containing appropriate values. /// containing appropriate values.
pub async fn aggregate( pub async fn aggregate(
query: String, query: &str,
page: u32, page: u32,
random_delay: bool, random_delay: bool,
debug: bool, debug: bool,
upstream_search_engines: Vec<EngineHandler>, upstream_search_engines: &[EngineHandler],
request_timeout: u8, request_timeout: u8,
) -> Result<SearchResults, Box<dyn std::error::Error>> { ) -> Result<SearchResults, Box<dyn std::error::Error>> {
let user_agent: String = random_user_agent(); let user_agent: &str = random_user_agent();
// Add a random delay before making the request. // Add a random delay before making the request.
if random_delay || !debug { if random_delay || !debug {
@ -80,19 +80,18 @@ pub async fn aggregate(
tokio::time::sleep(Duration::from_secs(delay_secs)).await; tokio::time::sleep(Duration::from_secs(delay_secs)).await;
} }
let mut names: Vec<&str> = vec![]; let mut names: Vec<&str> = Vec::with_capacity(0);
// create tasks for upstream result fetching // create tasks for upstream result fetching
let mut tasks: FutureVec = FutureVec::new(); let mut tasks: FutureVec = FutureVec::new();
for engine_handler in upstream_search_engines { for engine_handler in upstream_search_engines {
let (name, search_engine) = engine_handler.into_name_engine(); let (name, search_engine) = engine_handler.to_owned().into_name_engine();
names.push(name); names.push(name);
let query: String = query.clone(); let query: String = query.to_owned();
let user_agent: String = user_agent.clone();
tasks.push(tokio::spawn(async move { tasks.push(tokio::spawn(async move {
search_engine search_engine
.results(query, page, user_agent.clone(), request_timeout) .results(&query, page, user_agent, request_timeout)
.await .await
})); }));
} }
@ -110,7 +109,7 @@ pub async fn aggregate(
let mut result_map: HashMap<String, SearchResult> = HashMap::new(); let mut result_map: HashMap<String, SearchResult> = HashMap::new();
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new(); let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
let mut handle_error = |error: Report<EngineError>, engine_name: String| { let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
log::error!("Engine Error: {:?}", error); log::error!("Engine Error: {:?}", error);
engine_errors_info.push(EngineErrorInfo::new( engine_errors_info.push(EngineErrorInfo::new(
error.downcast_ref::<EngineError>().unwrap(), error.downcast_ref::<EngineError>().unwrap(),
@ -120,7 +119,7 @@ pub async fn aggregate(
for _ in 0..responses.len() { for _ in 0..responses.len() {
let response = responses.pop().unwrap(); let response = responses.pop().unwrap();
let engine = names.pop().unwrap().to_string(); let engine = names.pop().unwrap();
if result_map.is_empty() { if result_map.is_empty() {
match response { match response {
@ -128,7 +127,7 @@ pub async fn aggregate(
result_map = results.clone(); result_map = results.clone();
} }
Err(error) => { Err(error) => {
handle_error(error, engine); handle_error(&error, engine);
} }
} }
continue; continue;
@ -140,13 +139,13 @@ pub async fn aggregate(
result_map result_map
.entry(key) .entry(key)
.and_modify(|result| { .and_modify(|result| {
result.add_engines(engine.clone()); result.add_engines(engine);
}) })
.or_insert_with(|| -> SearchResult { value }); .or_insert_with(|| -> SearchResult { value });
}); });
} }
Err(error) => { Err(error) => {
handle_error(error, engine); handle_error(&error, engine);
} }
} }
} }
@ -155,24 +154,20 @@ pub async fn aggregate(
filter_with_lists( filter_with_lists(
&mut result_map, &mut result_map,
&mut blacklist_map, &mut blacklist_map,
&file_path(FileType::BlockList)?, file_path(FileType::BlockList)?,
)?; )?;
filter_with_lists( filter_with_lists(
&mut blacklist_map, &mut blacklist_map,
&mut result_map, &mut result_map,
&file_path(FileType::AllowList)?, file_path(FileType::AllowList)?,
)?; )?;
drop(blacklist_map); drop(blacklist_map);
let results: Vec<SearchResult> = result_map.into_values().collect(); let results: Vec<SearchResult> = result_map.into_values().collect();
Ok(SearchResults::new( Ok(SearchResults::new(results, query, &engine_errors_info))
results,
query.to_string(),
engine_errors_info,
))
} }
/// Filters a map of search results using a list of regex patterns. /// Filters a map of search results using a list of regex patterns.
@ -203,7 +198,10 @@ pub fn filter_with_lists(
|| re.is_match(&search_result.description.to_lowercase()) || re.is_match(&search_result.description.to_lowercase())
{ {
// If the search result matches the regex pattern, move it from the original map to the resultant map // If the search result matches the regex pattern, move it from the original map to the resultant map
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap()); resultant_map.insert(
url.to_owned(),
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
);
} }
} }
} }
@ -214,6 +212,7 @@ pub fn filter_with_lists(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use smallvec::smallvec;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::Write; use std::io::Write;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
@ -223,22 +222,22 @@ mod tests {
// Create a map of search results to filter // Create a map of search results to filter
let mut map_to_be_filtered = HashMap::new(); let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.insert( map_to_be_filtered.insert(
"https://www.example.com".to_string(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_string(), title: "Example Domain".to_owned(),
url: "https://www.example.com".to_string(), url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents." description: "This domain is for use in illustrative examples in documents."
.to_string(), .to_owned(),
engine: vec!["Google".to_string(), "Bing".to_string()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
); );
map_to_be_filtered.insert( map_to_be_filtered.insert(
"https://www.rust-lang.org/".to_string(), "https://www.rust-lang.org/".to_owned(),
SearchResult { SearchResult {
title: "Rust Programming Language".to_string(), title: "Rust Programming Language".to_owned(),
url: "https://www.rust-lang.org/".to_string(), url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
}, },
); );
@ -267,22 +266,22 @@ mod tests {
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> { fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
let mut map_to_be_filtered = HashMap::new(); let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.insert( map_to_be_filtered.insert(
"https://www.example.com".to_string(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_string(), title: "Example Domain".to_owned(),
url: "https://www.example.com".to_string(), url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents." description: "This domain is for use in illustrative examples in documents."
.to_string(), .to_owned(),
engine: vec!["Google".to_string(), "Bing".to_string()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
); );
map_to_be_filtered.insert( map_to_be_filtered.insert(
"https://www.rust-lang.org/".to_string(), "https://www.rust-lang.org/".to_owned(),
SearchResult { SearchResult {
title: "Rust Programming Language".to_string(), title: "Rust Programming Language".to_owned(),
url: "https://www.rust-lang.org/".to_string(), url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
}, },
); );
@ -327,13 +326,13 @@ mod tests {
fn test_filter_with_lists_invalid_regex() { fn test_filter_with_lists_invalid_regex() {
let mut map_to_be_filtered = HashMap::new(); let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.insert( map_to_be_filtered.insert(
"https://www.example.com".to_string(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_string(), title: "Example Domain".to_owned(),
url: "https://www.example.com".to_string(), url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents." description: "This domain is for use in illustrative examples in documents."
.to_string(), .to_owned(),
engine: vec!["Google".to_string(), "Bing".to_string()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
); );