mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-25 15:38:21 -05:00
parent
2885f23ec9
commit
13ce420642
@ -64,14 +64,14 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|||||||
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
||||||
/// containing appropriate values.
|
/// containing appropriate values.
|
||||||
pub async fn aggregate(
|
pub async fn aggregate(
|
||||||
query: String,
|
query: &str,
|
||||||
page: u32,
|
page: u32,
|
||||||
random_delay: bool,
|
random_delay: bool,
|
||||||
debug: bool,
|
debug: bool,
|
||||||
upstream_search_engines: Vec<EngineHandler>,
|
upstream_search_engines: &[EngineHandler],
|
||||||
request_timeout: u8,
|
request_timeout: u8,
|
||||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||||
let user_agent: String = random_user_agent();
|
let user_agent: &str = random_user_agent();
|
||||||
|
|
||||||
// Add a random delay before making the request.
|
// Add a random delay before making the request.
|
||||||
if random_delay || !debug {
|
if random_delay || !debug {
|
||||||
@ -80,19 +80,18 @@ pub async fn aggregate(
|
|||||||
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut names: Vec<&str> = vec![];
|
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||||
|
|
||||||
// create tasks for upstream result fetching
|
// create tasks for upstream result fetching
|
||||||
let mut tasks: FutureVec = FutureVec::new();
|
let mut tasks: FutureVec = FutureVec::new();
|
||||||
|
|
||||||
for engine_handler in upstream_search_engines {
|
for engine_handler in upstream_search_engines {
|
||||||
let (name, search_engine) = engine_handler.into_name_engine();
|
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
||||||
names.push(name);
|
names.push(name);
|
||||||
let query: String = query.clone();
|
let query: String = query.to_owned();
|
||||||
let user_agent: String = user_agent.clone();
|
|
||||||
tasks.push(tokio::spawn(async move {
|
tasks.push(tokio::spawn(async move {
|
||||||
search_engine
|
search_engine
|
||||||
.results(query, page, user_agent.clone(), request_timeout)
|
.results(&query, page, user_agent, request_timeout)
|
||||||
.await
|
.await
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@ -110,7 +109,7 @@ pub async fn aggregate(
|
|||||||
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
||||||
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
||||||
|
|
||||||
let mut handle_error = |error: Report<EngineError>, engine_name: String| {
|
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
||||||
log::error!("Engine Error: {:?}", error);
|
log::error!("Engine Error: {:?}", error);
|
||||||
engine_errors_info.push(EngineErrorInfo::new(
|
engine_errors_info.push(EngineErrorInfo::new(
|
||||||
error.downcast_ref::<EngineError>().unwrap(),
|
error.downcast_ref::<EngineError>().unwrap(),
|
||||||
@ -120,7 +119,7 @@ pub async fn aggregate(
|
|||||||
|
|
||||||
for _ in 0..responses.len() {
|
for _ in 0..responses.len() {
|
||||||
let response = responses.pop().unwrap();
|
let response = responses.pop().unwrap();
|
||||||
let engine = names.pop().unwrap().to_string();
|
let engine = names.pop().unwrap();
|
||||||
|
|
||||||
if result_map.is_empty() {
|
if result_map.is_empty() {
|
||||||
match response {
|
match response {
|
||||||
@ -128,7 +127,7 @@ pub async fn aggregate(
|
|||||||
result_map = results.clone();
|
result_map = results.clone();
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
handle_error(error, engine);
|
handle_error(&error, engine);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
@ -140,13 +139,13 @@ pub async fn aggregate(
|
|||||||
result_map
|
result_map
|
||||||
.entry(key)
|
.entry(key)
|
||||||
.and_modify(|result| {
|
.and_modify(|result| {
|
||||||
result.add_engines(engine.clone());
|
result.add_engines(engine);
|
||||||
})
|
})
|
||||||
.or_insert_with(|| -> SearchResult { value });
|
.or_insert_with(|| -> SearchResult { value });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
handle_error(error, engine);
|
handle_error(&error, engine);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -155,24 +154,20 @@ pub async fn aggregate(
|
|||||||
filter_with_lists(
|
filter_with_lists(
|
||||||
&mut result_map,
|
&mut result_map,
|
||||||
&mut blacklist_map,
|
&mut blacklist_map,
|
||||||
&file_path(FileType::BlockList)?,
|
file_path(FileType::BlockList)?,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
filter_with_lists(
|
filter_with_lists(
|
||||||
&mut blacklist_map,
|
&mut blacklist_map,
|
||||||
&mut result_map,
|
&mut result_map,
|
||||||
&file_path(FileType::AllowList)?,
|
file_path(FileType::AllowList)?,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
drop(blacklist_map);
|
drop(blacklist_map);
|
||||||
|
|
||||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
let results: Vec<SearchResult> = result_map.into_values().collect();
|
||||||
|
|
||||||
Ok(SearchResults::new(
|
Ok(SearchResults::new(results, query, &engine_errors_info))
|
||||||
results,
|
|
||||||
query.to_string(),
|
|
||||||
engine_errors_info,
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Filters a map of search results using a list of regex patterns.
|
/// Filters a map of search results using a list of regex patterns.
|
||||||
@ -203,7 +198,10 @@ pub fn filter_with_lists(
|
|||||||
|| re.is_match(&search_result.description.to_lowercase())
|
|| re.is_match(&search_result.description.to_lowercase())
|
||||||
{
|
{
|
||||||
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||||
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
resultant_map.insert(
|
||||||
|
url.to_owned(),
|
||||||
|
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -214,6 +212,7 @@ pub fn filter_with_lists(
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use smallvec::smallvec;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
@ -223,22 +222,22 @@ mod tests {
|
|||||||
// Create a map of search results to filter
|
// Create a map of search results to filter
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.example.com".to_string(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_string(),
|
title: "Example Domain".to_owned(),
|
||||||
url: "https://www.example.com".to_string(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_string(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.rust-lang.org/".to_string(),
|
"https://www.rust-lang.org/".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Rust Programming Language".to_string(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_string(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -267,22 +266,22 @@ mod tests {
|
|||||||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.example.com".to_string(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_string(),
|
title: "Example Domain".to_owned(),
|
||||||
url: "https://www.example.com".to_string(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_string(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.rust-lang.org/".to_string(),
|
"https://www.rust-lang.org/".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Rust Programming Language".to_string(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_string(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -327,13 +326,13 @@ mod tests {
|
|||||||
fn test_filter_with_lists_invalid_regex() {
|
fn test_filter_with_lists_invalid_regex() {
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.example.com".to_string(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_string(),
|
title: "Example Domain".to_owned(),
|
||||||
url: "https://www.example.com".to_string(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_string(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user