mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-24 23:18:22 -05:00
Merge pull request #195 from neon-mmd/optimize-and-make-code-idiomatic-3
⚙️ Optimize and make code more idiomatic (part - II)
This commit is contained in:
commit
b3b914d97c
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,3 +4,4 @@ package-lock.json
|
||||
dump.rdb
|
||||
.vscode
|
||||
megalinter-reports/
|
||||
dhat-heap.json
|
||||
|
47
Cargo.lock
generated
47
Cargo.lock
generated
@ -830,6 +830,22 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dhat"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f2aaf837aaf456f6706cb46386ba8dffd4013a757e36f4ea05c20dd46b209a3"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"lazy_static",
|
||||
"mintex",
|
||||
"parking_lot 0.12.1",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thousands",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
@ -1738,6 +1754,16 @@ dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mintex"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd7c5ba1c3b5a23418d7bbf98c71c3d4946a0125002129231da8d6b723d559cb"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"sys-info",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.6.23"
|
||||
@ -2891,6 +2917,9 @@ name = "smallvec"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
@ -3032,6 +3061,16 @@ dependencies = [
|
||||
"unicode-xid 0.2.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sys-info"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.8.0"
|
||||
@ -3085,6 +3124,12 @@ dependencies = [
|
||||
"syn 2.0.29",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thousands"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.45"
|
||||
@ -3634,6 +3679,7 @@ dependencies = [
|
||||
"actix-web",
|
||||
"async-trait",
|
||||
"criterion",
|
||||
"dhat",
|
||||
"env_logger",
|
||||
"error-stack",
|
||||
"fake-useragent",
|
||||
@ -3651,6 +3697,7 @@ dependencies = [
|
||||
"scraper",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smallvec 1.11.0",
|
||||
"tempfile",
|
||||
"tokio 1.32.0",
|
||||
]
|
||||
|
10
Cargo.toml
10
Cargo.toml
@ -8,7 +8,7 @@ license = "AGPL-3.0"
|
||||
|
||||
[dependencies]
|
||||
reqwest = {version="0.11.20",features=["json"]}
|
||||
tokio = {version="1.32.0",features=["full"]}
|
||||
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
||||
serde = {version="1.0.188",features=["derive"]}
|
||||
handlebars = { version = "4.3.7", features = ["dir_source"] }
|
||||
scraper = {version="0.17.1"}
|
||||
@ -28,6 +28,8 @@ error-stack = {version="0.4.0"}
|
||||
async-trait = {version="0.1.73"}
|
||||
regex = {version="1.9.4", features=["perf"]}
|
||||
futures = {version="0.3.28"}
|
||||
dhat = {version="0.3.2", optional = true}
|
||||
smallvec = {version="1.11.0", features=["union", "serde"]}
|
||||
|
||||
[dev-dependencies]
|
||||
rusty-hook = "^0.11.2"
|
||||
@ -48,7 +50,8 @@ rpath = false
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
debug = false
|
||||
debug = false # This should only be commented when testing with dhat profiler
|
||||
# debug = 1 # This should only be uncommented when testing with dhat profiler
|
||||
split-debuginfo = '...'
|
||||
debug-assertions = false
|
||||
overflow-checks = false
|
||||
@ -58,3 +61,6 @@ incremental = false
|
||||
codegen-units = 16
|
||||
rpath = false
|
||||
strip = "debuginfo"
|
||||
|
||||
[features]
|
||||
dhat-heap = ["dep:dhat"]
|
||||
|
@ -4,14 +4,14 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use reqwest::header::HeaderMap;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
use error_stack::{Report, Result, ResultExt};
|
||||
|
||||
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||
@ -39,9 +39,9 @@ impl SearchEngine for DuckDuckGo {
|
||||
/// or HeaderMap fails to initialize.
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: String,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
@ -61,38 +61,19 @@ impl SearchEngine for DuckDuckGo {
|
||||
};
|
||||
|
||||
// initializing HeaderMap and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
user_agent
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
REFERER,
|
||||
"https://google.com/"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
CONTENT_TYPE,
|
||||
"application/x-www-form-urlencoded"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
COOKIE,
|
||||
"kl=wt-wt"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||
("USER_AGENT".to_string(), user_agent.to_string()),
|
||||
("REFERER".to_string(), "https://google.com/".to_string()),
|
||||
(
|
||||
"CONTENT_TYPE".to_string(),
|
||||
"application/x-www-form-urlencoded".to_string(),
|
||||
),
|
||||
("COOKIE".to_string(), "kl=wt-wt".to_string()),
|
||||
]))
|
||||
.change_context(EngineError::UnexpectedError)?;
|
||||
|
||||
let document: Html = Html::parse_document(
|
||||
&DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
||||
&DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||
);
|
||||
|
||||
let no_result: Selector = Selector::parse(".no-results")
|
||||
@ -126,8 +107,7 @@ impl SearchEngine for DuckDuckGo {
|
||||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
.trim(),
|
||||
format!(
|
||||
"https://{}",
|
||||
result
|
||||
@ -136,15 +116,15 @@ impl SearchEngine for DuckDuckGo {
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
),
|
||||
)
|
||||
.as_str(),
|
||||
result
|
||||
.select(&result_desc)
|
||||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
vec!["duckduckgo".to_string()],
|
||||
.trim(),
|
||||
&["duckduckgo"],
|
||||
)
|
||||
})
|
||||
.map(|search_result| (search_result.url.clone(), search_result))
|
||||
|
@ -2,7 +2,7 @@
|
||||
//! the upstream search engines with the search query provided by the user.
|
||||
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
use error_stack::{IntoReport, Result, ResultExt};
|
||||
use error_stack::{Result, ResultExt};
|
||||
use std::{collections::HashMap, fmt, time::Duration};
|
||||
|
||||
/// A custom error type used for handle engine associated errors.
|
||||
@ -48,7 +48,7 @@ impl error_stack::Context for EngineError {}
|
||||
pub trait SearchEngine: Sync + Send {
|
||||
async fn fetch_html_from_upstream(
|
||||
&self,
|
||||
url: String,
|
||||
url: &str,
|
||||
header_map: reqwest::header::HeaderMap,
|
||||
request_timeout: u8,
|
||||
) -> Result<String, EngineError> {
|
||||
@ -59,19 +59,17 @@ pub trait SearchEngine: Sync + Send {
|
||||
.headers(header_map) // add spoofed headers to emulate human behavior
|
||||
.send()
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?
|
||||
.text()
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?)
|
||||
}
|
||||
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: String,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
}
|
||||
|
@ -2,14 +2,14 @@
|
||||
//! by querying the upstream searx search engine instance with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use reqwest::header::HeaderMap;
|
||||
use scraper::{Html, Selector};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
use error_stack::{Report, Result, ResultExt};
|
||||
|
||||
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||
@ -38,9 +38,9 @@ impl SearchEngine for Searx {
|
||||
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: String,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
@ -51,32 +51,16 @@ impl SearchEngine for Searx {
|
||||
};
|
||||
|
||||
// initializing headers and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
user_agent
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
REFERER,
|
||||
"https://google.com/"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
CONTENT_TYPE,
|
||||
"application/x-www-form-urlencoded"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
|
||||
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||
("USER_AGENT".to_string(), user_agent.to_string()),
|
||||
("REFERER".to_string(), "https://google.com/".to_string()),
|
||||
("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
|
||||
("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
|
||||
]))
|
||||
.change_context(EngineError::UnexpectedError)?;
|
||||
|
||||
let document: Html = Html::parse_document(
|
||||
&Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
||||
&Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||
);
|
||||
|
||||
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
||||
@ -117,24 +101,21 @@ impl SearchEngine for Searx {
|
||||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
.trim(),
|
||||
result
|
||||
.select(&result_url)
|
||||
.next()
|
||||
.unwrap()
|
||||
.value()
|
||||
.attr("href")
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
.unwrap(),
|
||||
result
|
||||
.select(&result_desc)
|
||||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
vec!["searx".to_string()],
|
||||
.trim(),
|
||||
&["searx"],
|
||||
)
|
||||
})
|
||||
.map(|search_result| (search_result.url.clone(), search_result))
|
||||
|
@ -2,6 +2,7 @@
|
||||
//! data scraped from the upstream search engines.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
||||
|
||||
@ -16,13 +17,13 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
||||
/// (href url in html in simple words).
|
||||
/// * `description` - The description of the search result.
|
||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
pub title: String,
|
||||
pub url: String,
|
||||
pub description: String,
|
||||
pub engine: Vec<String>,
|
||||
pub engine: SmallVec<[String; 0]>,
|
||||
}
|
||||
|
||||
impl SearchResult {
|
||||
@ -35,12 +36,12 @@ impl SearchResult {
|
||||
/// (href url in html in simple words).
|
||||
/// * `description` - The description of the search result.
|
||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||
pub fn new(title: String, url: String, description: String, engine: Vec<String>) -> Self {
|
||||
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
||||
SearchResult {
|
||||
title,
|
||||
url,
|
||||
description,
|
||||
engine,
|
||||
title: title.to_owned(),
|
||||
url: url.to_owned(),
|
||||
description: description.to_owned(),
|
||||
engine: engine.iter().map(|name| name.to_string()).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -49,8 +50,8 @@ impl SearchResult {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `engine` - Takes an engine name provided as a String.
|
||||
pub fn add_engines(&mut self, engine: String) {
|
||||
self.engine.push(engine)
|
||||
pub fn add_engines(&mut self, engine: &str) {
|
||||
self.engine.push(engine.to_owned())
|
||||
}
|
||||
|
||||
/// A function which returns the engine name stored from the struct as a string.
|
||||
@ -58,13 +59,12 @@ impl SearchResult {
|
||||
/// # Returns
|
||||
///
|
||||
/// An engine name stored as a string from the struct.
|
||||
pub fn engine(self) -> String {
|
||||
self.engine.get(0).unwrap().to_string()
|
||||
pub fn engine(&mut self) -> String {
|
||||
std::mem::take(&mut self.engine[0])
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct EngineErrorInfo {
|
||||
pub error: String,
|
||||
pub engine: String,
|
||||
@ -72,18 +72,18 @@ pub struct EngineErrorInfo {
|
||||
}
|
||||
|
||||
impl EngineErrorInfo {
|
||||
pub fn new(error: &EngineError, engine: String) -> Self {
|
||||
pub fn new(error: &EngineError, engine: &str) -> Self {
|
||||
Self {
|
||||
error: match error {
|
||||
EngineError::RequestError => String::from("RequestError"),
|
||||
EngineError::EmptyResultSet => String::from("EmptyResultSet"),
|
||||
EngineError::UnexpectedError => String::from("UnexpectedError"),
|
||||
EngineError::RequestError => "RequestError".to_owned(),
|
||||
EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
|
||||
EngineError::UnexpectedError => "UnexpectedError".to_owned(),
|
||||
},
|
||||
engine,
|
||||
engine: engine.to_owned(),
|
||||
severity_color: match error {
|
||||
EngineError::RequestError => String::from("green"),
|
||||
EngineError::EmptyResultSet => String::from("blue"),
|
||||
EngineError::UnexpectedError => String::from("red"),
|
||||
EngineError::RequestError => "green".to_owned(),
|
||||
EngineError::EmptyResultSet => "blue".to_owned(),
|
||||
EngineError::UnexpectedError => "red".to_owned(),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -108,7 +108,7 @@ pub struct SearchResults {
|
||||
pub results: Vec<SearchResult>,
|
||||
pub page_query: String,
|
||||
pub style: Style,
|
||||
pub engine_errors_info: Vec<EngineErrorInfo>,
|
||||
pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
|
||||
}
|
||||
|
||||
impl SearchResults {
|
||||
@ -124,19 +124,19 @@ impl SearchResults {
|
||||
/// given search query.
|
||||
pub fn new(
|
||||
results: Vec<SearchResult>,
|
||||
page_query: String,
|
||||
engine_errors_info: Vec<EngineErrorInfo>,
|
||||
page_query: &str,
|
||||
engine_errors_info: &[EngineErrorInfo],
|
||||
) -> Self {
|
||||
SearchResults {
|
||||
Self {
|
||||
results,
|
||||
page_query,
|
||||
style: Style::new("".to_string(), "".to_string()),
|
||||
engine_errors_info,
|
||||
page_query: page_query.to_owned(),
|
||||
style: Style::default(),
|
||||
engine_errors_info: SmallVec::from(engine_errors_info),
|
||||
}
|
||||
}
|
||||
|
||||
/// A setter function to add website style to the return search results.
|
||||
pub fn add_style(&mut self, style: Style) {
|
||||
self.style = style;
|
||||
pub fn add_style(&mut self, style: &Style) {
|
||||
self.style = style.to_owned();
|
||||
}
|
||||
}
|
||||
|
@ -64,11 +64,11 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
||||
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
||||
/// containing appropriate values.
|
||||
pub async fn aggregate(
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
random_delay: bool,
|
||||
debug: bool,
|
||||
upstream_search_engines: Vec<EngineHandler>,
|
||||
upstream_search_engines: &[EngineHandler],
|
||||
request_timeout: u8,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
let user_agent: &str = random_user_agent();
|
||||
@ -80,18 +80,18 @@ pub async fn aggregate(
|
||||
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
||||
}
|
||||
|
||||
let mut names: Vec<&str> = vec![];
|
||||
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||
|
||||
// create tasks for upstream result fetching
|
||||
let mut tasks: FutureVec = FutureVec::new();
|
||||
|
||||
for engine_handler in upstream_search_engines {
|
||||
let (name, search_engine) = engine_handler.into_name_engine();
|
||||
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
||||
names.push(name);
|
||||
let query: String = query.clone();
|
||||
let query: String = query.to_owned();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
search_engine
|
||||
.results(query, page, user_agent.to_owned(), request_timeout)
|
||||
.results(&query, page, user_agent, request_timeout)
|
||||
.await
|
||||
}));
|
||||
}
|
||||
@ -109,7 +109,7 @@ pub async fn aggregate(
|
||||
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
||||
|
||||
let mut handle_error = |error: Report<EngineError>, engine_name: String| {
|
||||
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
||||
log::error!("Engine Error: {:?}", error);
|
||||
engine_errors_info.push(EngineErrorInfo::new(
|
||||
error.downcast_ref::<EngineError>().unwrap(),
|
||||
@ -119,7 +119,7 @@ pub async fn aggregate(
|
||||
|
||||
for _ in 0..responses.len() {
|
||||
let response = responses.pop().unwrap();
|
||||
let engine = names.pop().unwrap().to_string();
|
||||
let engine = names.pop().unwrap();
|
||||
|
||||
if result_map.is_empty() {
|
||||
match response {
|
||||
@ -127,7 +127,7 @@ pub async fn aggregate(
|
||||
result_map = results.clone();
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(error, engine);
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
@ -139,13 +139,13 @@ pub async fn aggregate(
|
||||
result_map
|
||||
.entry(key)
|
||||
.and_modify(|result| {
|
||||
result.add_engines(engine.clone());
|
||||
result.add_engines(engine);
|
||||
})
|
||||
.or_insert_with(|| -> SearchResult { value });
|
||||
});
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(error, engine);
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -167,11 +167,7 @@ pub async fn aggregate(
|
||||
|
||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
||||
|
||||
Ok(SearchResults::new(
|
||||
results,
|
||||
query.to_string(),
|
||||
engine_errors_info,
|
||||
))
|
||||
Ok(SearchResults::new(results, query, &engine_errors_info))
|
||||
}
|
||||
|
||||
/// Filters a map of search results using a list of regex patterns.
|
||||
@ -202,7 +198,10 @@ pub fn filter_with_lists(
|
||||
|| re.is_match(&search_result.description.to_lowercase())
|
||||
{
|
||||
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
||||
resultant_map.insert(
|
||||
url.to_owned(),
|
||||
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -213,6 +212,7 @@ pub fn filter_with_lists(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use smallvec::smallvec;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
@ -222,22 +222,22 @@ mod tests {
|
||||
// Create a map of search results to filter
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.rust-lang.org/".to_string(),
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_string(),
|
||||
url: "https://www.rust-lang.org/".to_string(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
url: "https://www.rust-lang.org/".to_owned(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
@ -266,22 +266,22 @@ mod tests {
|
||||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.rust-lang.org/".to_string(),
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_string(),
|
||||
url: "https://www.rust-lang.org/".to_string(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
url: "https://www.rust-lang.org/".to_owned(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
@ -326,13 +326,13 @@ mod tests {
|
||||
fn test_filter_with_lists_invalid_regex() {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
|
@ -62,10 +62,10 @@ pub async fn not_found(
|
||||
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Deserialize)]
|
||||
struct Cookie {
|
||||
theme: String,
|
||||
colorscheme: String,
|
||||
engines: Vec<String>,
|
||||
struct Cookie<'a> {
|
||||
theme: &'a str,
|
||||
colorscheme: &'a str,
|
||||
engines: Vec<&'a str>,
|
||||
}
|
||||
|
||||
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
||||
@ -111,9 +111,9 @@ pub async fn search(
|
||||
page - 1
|
||||
),
|
||||
&config,
|
||||
query.to_string(),
|
||||
query,
|
||||
page - 1,
|
||||
req.clone(),
|
||||
&req,
|
||||
),
|
||||
results(
|
||||
format!(
|
||||
@ -121,9 +121,9 @@ pub async fn search(
|
||||
config.binding_ip, config.port, query, page
|
||||
),
|
||||
&config,
|
||||
query.to_string(),
|
||||
query,
|
||||
page,
|
||||
req.clone(),
|
||||
&req,
|
||||
),
|
||||
results(
|
||||
format!(
|
||||
@ -134,9 +134,9 @@ pub async fn search(
|
||||
page + 1
|
||||
),
|
||||
&config,
|
||||
query.to_string(),
|
||||
query,
|
||||
page + 1,
|
||||
req.clone(),
|
||||
&req,
|
||||
)
|
||||
);
|
||||
|
||||
@ -154,9 +154,9 @@ pub async fn search(
|
||||
async fn results(
|
||||
url: String,
|
||||
config: &Config,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
req: HttpRequest,
|
||||
req: &HttpRequest,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
//Initialize redis cache connection struct
|
||||
let mut redis_cache = RedisCache::new(&config.redis_url, 5).await?;
|
||||
@ -165,19 +165,17 @@ async fn results(
|
||||
// check if fetched cache results was indeed fetched or it was an error and if so
|
||||
// handle the data accordingly.
|
||||
match cached_results_json {
|
||||
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results).unwrap()),
|
||||
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
||||
Err(_) => {
|
||||
// check if the cookie value is empty or not if it is empty then use the
|
||||
// default selected upstream search engines from the config file otherwise
|
||||
// parse the non-empty cookie and grab the user selected engines from the
|
||||
// UI and use that.
|
||||
let mut results: crate::results::aggregation_models::SearchResults = match req
|
||||
.cookie("appCookie")
|
||||
{
|
||||
let mut results: SearchResults = match req.cookie("appCookie") {
|
||||
Some(cookie_value) => {
|
||||
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
||||
|
||||
let engines = cookie_value
|
||||
let engines: Vec<EngineHandler> = cookie_value
|
||||
.engines
|
||||
.iter()
|
||||
.filter_map(|name| EngineHandler::new(name))
|
||||
@ -188,7 +186,7 @@ async fn results(
|
||||
page,
|
||||
config.aggregator.random_delay,
|
||||
config.debug,
|
||||
engines,
|
||||
&engines,
|
||||
config.request_timeout,
|
||||
)
|
||||
.await?
|
||||
@ -199,13 +197,14 @@ async fn results(
|
||||
page,
|
||||
config.aggregator.random_delay,
|
||||
config.debug,
|
||||
config.upstream_search_engines.clone(),
|
||||
&config.upstream_search_engines,
|
||||
config.request_timeout,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
results.add_style(config.style.clone());
|
||||
|
||||
results.add_style(&config.style);
|
||||
redis_cache
|
||||
.cache_results(&serde_json::to_string(&results)?, &url)
|
||||
.await?;
|
||||
|
Loading…
Reference in New Issue
Block a user