From 2d47e8d73013d75fbeeb48647caaaddfb509ccc7 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 30 Jul 2023 10:53:48 +0300 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20provide=20the=20functionali?= =?UTF-8?q?ty=20to=20use=20the=20new=20config=20option?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/engines/duckduckgo.rs | 3 ++- src/engines/engine_models.rs | 4 +++- src/engines/searx.rs | 8 +++++--- src/results/aggregator.rs | 9 ++++++--- src/server/routes.rs | 2 ++ 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index f8ad597..8059b90 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -41,6 +41,7 @@ impl SearchEngine for DuckDuckGo { query: String, page: u32, user_agent: String, + request_timeout: u8, ) -> Result, EngineError> { // Page number can be missing or empty string and so appropriate handling is required // so that upstream server recieves valid page number. @@ -90,7 +91,7 @@ impl SearchEngine for DuckDuckGo { ); let document: Html = Html::parse_document( - &DuckDuckGo::fetch_html_from_upstream(self, url, header_map).await?, + &DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?, ); let no_result: Selector = Selector::parse(".no-results") diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs index f635ca9..b5051be 100644 --- a/src/engines/engine_models.rs +++ b/src/engines/engine_models.rs @@ -50,11 +50,12 @@ pub trait SearchEngine { &self, url: String, header_map: reqwest::header::HeaderMap, + request_timeout: u8, ) -> Result { // fetch the html from upstream search engine Ok(reqwest::Client::new() .get(url) - .timeout(Duration::from_secs(30)) // Add timeout to request to avoid DDOSing the server + .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server .headers(header_map) // add spoofed headers to emulate human behaviour .send() .await @@ -71,5 +72,6 @@ pub trait SearchEngine { query: String, page: u32, user_agent: String, + request_timeout: u8, ) -> Result, EngineError>; } diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 145abf1..6274400 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -4,7 +4,7 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use scraper::{Html, Selector}; -use std::collections::HashMap; +use std::{collections::HashMap}; use crate::results::aggregation_models::RawSearchResult; @@ -40,6 +40,7 @@ impl SearchEngine for Searx { query: String, page: u32, user_agent: String, + request_timeout: u8, ) -> Result, EngineError> { // Page number can be missing or empty string and so appropriate handling is required // so that upstream server recieves valid page number. @@ -70,8 +71,9 @@ impl SearchEngine for Searx { ); header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?); - let document: Html = - Html::parse_document(&Searx::fetch_html_from_upstream(self, url, header_map).await?); + let document: Html = Html::parse_document( + &Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?, + ); let no_result: Selector = Selector::parse("#urls>.dialog-error>p") .map_err(|_| Report::new(EngineError::UnexpectedError)) diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 501b273..b7ad050 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -64,6 +64,7 @@ pub async fn aggregate( random_delay: bool, debug: bool, upstream_search_engines: Vec, + request_timeout: u8, ) -> Result> { let user_agent: String = random_user_agent(); let mut result_map: HashMap = HashMap::new(); @@ -92,9 +93,11 @@ pub async fn aggregate( .map(|search_engine| { let query: String = query.clone(); let user_agent: String = user_agent.clone(); - tokio::spawn( - async move { search_engine.results(query, page, user_agent.clone()).await }, - ) + tokio::spawn(async move { + search_engine + .results(query, page, user_agent.clone(), request_timeout) + .await + }) }) .collect(); diff --git a/src/server/routes.rs b/src/server/routes.rs index cb6999d..0b07115 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -146,6 +146,7 @@ async fn results( config.aggregator.random_delay, config.debug, cookie_value.engines, + config.request_timeout, ) .await? } @@ -156,6 +157,7 @@ async fn results( config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone(), + config.request_timeout, ) .await? }