From 2803471821a323cce8a596c66563acafdb4e1d34 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 03:32:57 +0000 Subject: [PATCH 1/8] build(deps): bump mlua from 0.8.10 to 0.9.1 Bumps [mlua](https://github.com/khvzak/mlua) from 0.8.10 to 0.9.1. - [Changelog](https://github.com/khvzak/mlua/blob/master/CHANGELOG.md) - [Commits](https://github.com/khvzak/mlua/compare/v0.8.10...v0.9.1) --- updated-dependencies: - dependency-name: mlua dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 27 +++++++++++++++++++-------- Cargo.toml | 2 +- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d070cf8..21dce61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -433,11 +433,12 @@ dependencies = [ [[package]] name = "bstr" -version = "0.2.17" +version = "1.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a" dependencies = [ "memchr", + "serde", ] [[package]] @@ -1992,20 +1993,30 @@ dependencies = [ [[package]] name = "mlua" -version = "0.8.10" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb37b0ba91f017aa7ca2b98ef99496827770cd635b4a932a6047c5b4bbe678e" +checksum = "6c3a7a7ff4481ec91b951a733390211a8ace1caba57266ccb5f4d4966704e560" dependencies = [ "bstr", - "cc", - "lua-src", - "luajit-src", + "mlua-sys", "num-traits", "once_cell", - "pkg-config", "rustc-hash", ] +[[package]] +name = "mlua-sys" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ec8b54eddb76093069cce9eeffb4c7b3a1a0fe66962d7bd44c4867928149ca3" +dependencies = [ + "cc", + "cfg-if 1.0.0", + "lua-src", + "luajit-src", + "pkg-config", +] + [[package]] name = "native-tls" version = "0.2.11" diff --git a/Cargo.toml b/Cargo.toml index a5a81d2..cf9df90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ serde_json = {version="1.0.105"} fake-useragent = {version="0.1.3"} env_logger = {version="0.10.0"} log = {version="0.4.20"} -mlua = {version="0.8.10", features=["luajit", "vendored"]} +mlua = {version="0.9.1", features=["luajit", "vendored"]} redis = {version="0.23.3", features=["tokio-comp","connection-manager"], optional = true} md5 = {version="0.7.0"} rand={version="0.8.5"} From 5e63f37d706e9182996fc42a4748914d3ca50f55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Mota?= Date: Wed, 4 Oct 2023 19:17:42 +0100 Subject: [PATCH 2/8] =?UTF-8?q?=F0=9F=93=9D=20Three=20points=20listed=20un?= =?UTF-8?q?der=20the=20`Features`=20section=20(#304)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add additional features to README * 🚀 chore: bump the app version (#304) * 🚀 chore: bump the app version (#304) --------- Co-authored-by: alamin655 --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6bcb939..1d45c38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3954,7 +3954,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "1.0.8" +version = "1.0.9" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index cdeffa5..a291449 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "1.0.8" +version = "1.0.9" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" diff --git a/README.md b/README.md index 6161143..b5b7e3b 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,9 @@ # Features 🚀 - 🎨 Make Websurfx uniquely yours with twelve color schemes provided by default. It also supports creation of custom themes and color schemes in a quick and easy way, so unleash your creativity! +- 🚀 Easy to setup with docker or on bare metal with various installation/deployement options. +- ⛔ Search filtering to filter search results based on four different levels. +- 💾 Different caching levels focusing on reliability, speed and resiliancy. - 🔐 Fast, private, and secure - 🆓 100% free and open source - 💨 Ad-free and clean results From 75a77d25f06f2dbf53494655f76719568d7adc9f Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Sun, 24 Sep 2023 13:54:08 +0200 Subject: [PATCH 3/8] Create separate search_result_parser --- src/config/parser.rs | 2 +- src/engines/duckduckgo.rs | 49 ++++++++++++++------------- src/engines/mod.rs | 1 + src/engines/search_result_parser.rs | 38 +++++++++++++++++++++ src/engines/searx.rs | 52 ++++++++++++++--------------- src/models/aggregation_models.rs | 2 ++ src/models/engine_models.rs | 26 ++++++++++----- src/server/routes/search.rs | 2 +- 8 files changed, 110 insertions(+), 62 deletions(-) create mode 100644 src/engines/search_result_parser.rs diff --git a/src/config/parser.rs b/src/config/parser.rs index fb9f8b1..d7202a6 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -111,7 +111,7 @@ impl Config { .get::<_, HashMap>("upstream_search_engines")? .into_iter() .filter_map(|(key, value)| value.then_some(key)) - .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine)) + .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine).ok()) .collect(), request_timeout: globals.get::<_, u8>("request_timeout")?, threads, diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 0f06ea4..318e764 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use reqwest::header::HeaderMap; -use scraper::{Html, Selector}; +use scraper::Html; use crate::models::aggregation_models::SearchResult; @@ -13,9 +13,27 @@ use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{Report, Result, ResultExt}; +use super::search_result_parser::SearchResultParser; + /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. -pub struct DuckDuckGo; +pub struct DuckDuckGo { + parser: SearchResultParser, +} + +impl DuckDuckGo { + pub fn new() -> Result { + Ok(Self { + parser: SearchResultParser::new( + ".no-results", + ".result", + ".result__a", + ".result__url", + ".result__snippet", + )?, + }) + } +} #[async_trait::async_trait] impl SearchEngine for DuckDuckGo { @@ -59,34 +77,17 @@ impl SearchEngine for DuckDuckGo { &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?, ); - let no_result: Selector = Selector::parse(".no-results") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".no-results"))?; - - if document.select(&no_result).next().is_some() { + if document.select(&self.parser.no_result).next().is_some() { return Err(Report::new(EngineError::EmptyResultSet)); } - let results: Selector = Selector::parse(".result") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?; - let result_title: Selector = Selector::parse(".result__a") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__a"))?; - let result_url: Selector = Selector::parse(".result__url") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__url"))?; - let result_desc: Selector = Selector::parse(".result__snippet") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__snippet"))?; - // scrape all the results from the html Ok(document - .select(&results) + .select(&self.parser.results) .map(|result| { SearchResult::new( result - .select(&result_title) + .select(&self.parser.result_title) .next() .unwrap() .inner_html() @@ -94,7 +95,7 @@ impl SearchEngine for DuckDuckGo { format!( "https://{}", result - .select(&result_url) + .select(&self.parser.result_url) .next() .unwrap() .inner_html() @@ -102,7 +103,7 @@ impl SearchEngine for DuckDuckGo { ) .as_str(), result - .select(&result_desc) + .select(&self.parser.result_desc) .next() .unwrap() .inner_html() diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 0016728..39b50c8 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -4,4 +4,5 @@ //! code. Moreover, it also provides a custom error for the upstream search engine handling code. pub mod duckduckgo; +pub mod search_result_parser; pub mod searx; diff --git a/src/engines/search_result_parser.rs b/src/engines/search_result_parser.rs new file mode 100644 index 0000000..6918917 --- /dev/null +++ b/src/engines/search_result_parser.rs @@ -0,0 +1,38 @@ +use crate::models::engine_models::EngineError; +use error_stack::{Report, Result, ResultExt}; +use scraper::{Html, Selector}; + +pub struct SearchResultParser { + pub no_result: Selector, + pub results: Selector, + pub result_title: Selector, + pub result_url: Selector, + pub result_desc: Selector, +} + +impl SearchResultParser { + pub fn new( + no_result_selector: &str, + results_selector: &str, + result_title_selector: &str, + result_url_selector: &str, + result_desc_selector: &str, + ) -> Result { + Ok(SearchResultParser { + no_result: new_selector(no_result_selector)?, + results: new_selector(results_selector)?, + result_title: new_selector(result_title_selector)?, + result_url: new_selector(result_url_selector)?, + result_desc: new_selector(result_desc_selector)?, + }) + } +} + +fn new_selector(selector: &str) -> Result { + Selector::parse(selector).map_err(|err| { + Report::new(EngineError::UnexpectedError).attach_printable(format!( + "invalid CSS selector: {}, err: {:?}", + selector, err + )) + }) +} diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 6ab0469..32f286e 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -3,16 +3,34 @@ //! number if provided. use reqwest::header::HeaderMap; -use scraper::{Html, Selector}; +use scraper::Html; use std::collections::HashMap; +use super::search_result_parser::SearchResultParser; use crate::models::aggregation_models::SearchResult; use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{Report, Result, ResultExt}; /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. -pub struct Searx; +pub struct Searx { + parser: SearchResultParser, +} + +impl Searx { + // new Searchx engine + pub fn new() -> Result { + Ok(Self { + parser: SearchResultParser::new( + "#urls>.dialog-error>p", + ".result", + "h3>a", + "h3>a", + ".content", + )?, + }) + } +} #[async_trait::async_trait] impl SearchEngine for Searx { @@ -52,13 +70,7 @@ impl SearchEngine for Searx { &Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?, ); - let no_result: Selector = Selector::parse("#urls>.dialog-error>p") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| { - format!("invalid CSS selector: {}", "#urls>.dialog-error>p") - })?; - - if let Some(no_result_msg) = document.select(&no_result).nth(1) { + if let Some(no_result_msg) = document.select(&self.parser.no_result).nth(1) { if no_result_msg.inner_html() == "we didn't find any results. Please use another query or search in more categories" { @@ -66,40 +78,26 @@ impl SearchEngine for Searx { } } - let results: Selector = Selector::parse(".result") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?; - let result_title: Selector = Selector::parse("h3>a") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?; - let result_url: Selector = Selector::parse("h3>a") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?; - - let result_desc: Selector = Selector::parse(".content") - .map_err(|_| Report::new(EngineError::UnexpectedError)) - .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".content"))?; - // scrape all the results from the html Ok(document - .select(&results) + .select(&self.parser.results) .map(|result| { SearchResult::new( result - .select(&result_title) + .select(&self.parser.result_title) .next() .unwrap() .inner_html() .trim(), result - .select(&result_url) + .select(&self.parser.result_url) .next() .unwrap() .value() .attr("href") .unwrap(), result - .select(&result_desc) + .select(&self.parser.result_desc) .next() .unwrap() .inner_html() diff --git a/src/models/aggregation_models.rs b/src/models/aggregation_models.rs index 72bbf08..6e4bddf 100644 --- a/src/models/aggregation_models.rs +++ b/src/models/aggregation_models.rs @@ -85,12 +85,14 @@ impl EngineErrorInfo { pub fn new(error: &EngineError, engine: &str) -> Self { Self { error: match error { + EngineError::EngineNotFound => "EngineNotFound".to_owned(), EngineError::RequestError => "RequestError".to_owned(), EngineError::EmptyResultSet => "EmptyResultSet".to_owned(), EngineError::UnexpectedError => "UnexpectedError".to_owned(), }, engine: engine.to_owned(), severity_color: match error { + EngineError::EngineNotFound => "red".to_owned(), EngineError::RequestError => "green".to_owned(), EngineError::EmptyResultSet => "blue".to_owned(), EngineError::UnexpectedError => "red".to_owned(), diff --git a/src/models/engine_models.rs b/src/models/engine_models.rs index d4a4e72..77ec4c4 100644 --- a/src/models/engine_models.rs +++ b/src/models/engine_models.rs @@ -2,12 +2,14 @@ //! the upstream search engines with the search query provided by the user. use super::aggregation_models::SearchResult; -use error_stack::{Result, ResultExt}; +use error_stack::{Report, Result, ResultExt}; use std::{collections::HashMap, fmt, time::Duration}; /// A custom error type used for handle engine associated errors. #[derive(Debug)] pub enum EngineError { + // No matching engine found + EngineNotFound, /// This variant handles all request related errors like forbidden, not found, /// etc. EmptyResultSet, @@ -24,6 +26,9 @@ pub enum EngineError { impl fmt::Display for EngineError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + EngineError::EngineNotFound => { + write!(f, "Search engine not found") + } EngineError::EmptyResultSet => { write!(f, "The upstream search engine returned an empty result set") } @@ -134,18 +139,21 @@ impl EngineHandler { /// # Returns /// /// It returns an option either containing the value or a none if the engine is unknown - pub fn new(engine_name: &str) -> Option { + pub fn new(engine_name: &str) -> Result { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { - "duckduckgo" => ( - "duckduckgo", - Box::new(crate::engines::duckduckgo::DuckDuckGo), - ), - "searx" => ("searx", Box::new(crate::engines::searx::Searx)), - _ => return None, + "duckduckgo" => { + let engine = crate::engines::duckduckgo::DuckDuckGo::new()?; + ("duckduckgo", Box::new(engine)) + } + "searx" => { + let engine = crate::engines::searx::Searx::new()?; + ("searx", Box::new(engine)) + } + _ => return Err(Report::from(EngineError::EngineNotFound)), }; - Some(Self { + Ok(Self { engine: engine.1, name: engine.0, }) diff --git a/src/server/routes/search.rs b/src/server/routes/search.rs index 80db98f..9dbd1e1 100644 --- a/src/server/routes/search.rs +++ b/src/server/routes/search.rs @@ -191,7 +191,7 @@ async fn results( let engines: Vec = cookie_value .engines .iter() - .filter_map(|name| EngineHandler::new(name)) + .filter_map(|name| EngineHandler::new(name).ok()) .collect(); safe_search_level = match config.safe_search { From 57c73d38c80024bcf9daf19ed9b7e2af6b1190fd Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Sun, 24 Sep 2023 15:09:03 +0200 Subject: [PATCH 4/8] Refactor the search result parsing --- src/engines/duckduckgo.rs | 40 ++++++----------------- src/engines/search_result_parser.rs | 49 ++++++++++++++++++++++++----- src/engines/searx.rs | 42 ++++++++----------------- src/models/engine_models.rs | 2 +- 4 files changed, 65 insertions(+), 68 deletions(-) diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 318e764..8a28dda 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -18,10 +18,12 @@ use super::search_result_parser::SearchResultParser; /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. pub struct DuckDuckGo { + // The parser, used to interpret the search result. parser: SearchResultParser, } impl DuckDuckGo { + /// Creates the DuckDuckGo parser. pub fn new() -> Result { Ok(Self { parser: SearchResultParser::new( @@ -77,41 +79,19 @@ impl SearchEngine for DuckDuckGo { &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?, ); - if document.select(&self.parser.no_result).next().is_some() { + if self.parser.parse_for_no_results(&document).next().is_some() { return Err(Report::new(EngineError::EmptyResultSet)); } // scrape all the results from the html - Ok(document - .select(&self.parser.results) - .map(|result| { - SearchResult::new( - result - .select(&self.parser.result_title) - .next() - .unwrap() - .inner_html() - .trim(), - format!( - "https://{}", - result - .select(&self.parser.result_url) - .next() - .unwrap() - .inner_html() - .trim() - ) - .as_str(), - result - .select(&self.parser.result_desc) - .next() - .unwrap() - .inner_html() - .trim(), + self.parser + .parse_for_results(&document, |title, url, desc| { + Some(SearchResult::new( + title.inner_html().trim(), + &format!("https://{}", url.inner_html().trim()), + desc.inner_html().trim(), &["duckduckgo"], - ) + )) }) - .map(|search_result| (search_result.url.clone(), search_result)) - .collect()) } } diff --git a/src/engines/search_result_parser.rs b/src/engines/search_result_parser.rs index 6918917..94fe0e8 100644 --- a/src/engines/search_result_parser.rs +++ b/src/engines/search_result_parser.rs @@ -1,16 +1,21 @@ -use crate::models::engine_models::EngineError; -use error_stack::{Report, Result, ResultExt}; -use scraper::{Html, Selector}; +//! This modules provides helper functionalities for parsing a html document into internal SearchResult. +use std::collections::HashMap; +use crate::models::{aggregation_models::SearchResult, engine_models::EngineError}; +use error_stack::{Report, Result}; +use scraper::{html::Select, ElementRef, Html, Selector}; + +/// A html search result parser, based on a predefined CSS selectors. pub struct SearchResultParser { - pub no_result: Selector, - pub results: Selector, - pub result_title: Selector, - pub result_url: Selector, - pub result_desc: Selector, + no_result: Selector, + results: Selector, + result_title: Selector, + result_url: Selector, + result_desc: Selector, } impl SearchResultParser { + /// Creates a new parser, if all the selectors are valid, otherwise it returns an EngineError pub fn new( no_result_selector: &str, results_selector: &str, @@ -26,8 +31,36 @@ impl SearchResultParser { result_desc: new_selector(result_desc_selector)?, }) } + + /// Parse the html and returns element representing the 'no result found' response. + pub fn parse_for_no_results<'a>(&'a self, document: &'a Html) -> Select<'a, 'a> { + document.select(&self.no_result) + } + + /// Parse the html, and convert the results to SearchResult with the help of the builder function + pub fn parse_for_results( + &self, + document: &Html, + builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option, + ) -> Result, EngineError> { + let res = document + .select(&self.results) + .filter_map(|result| { + let title = result.select(&self.result_title).next(); + let url = result.select(&self.result_url).next(); + let desc = result.select(&self.result_desc).next(); + match (title, url, desc) { + (Some(ref t), Some(ref u), Some(ref d)) => builder(t, u, d), + _ => None, + } + }) + .map(|search_result| (search_result.url.clone(), search_result)) + .collect(); + Ok(res) + } } +/// Create a Selector struct, if the given parameter is a valid css expression, otherwise convert it into an EngineError. fn new_selector(selector: &str) -> Result { Selector::parse(selector).map_err(|err| { Report::new(EngineError::UnexpectedError).attach_printable(format!( diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 32f286e..ca08b98 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -14,11 +14,12 @@ use error_stack::{Report, Result, ResultExt}; /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. pub struct Searx { + // The parser, used to interpret the search result. parser: SearchResultParser, } impl Searx { - // new Searchx engine + /// creates a Searx parser pub fn new() -> Result { Ok(Self { parser: SearchResultParser::new( @@ -70,7 +71,7 @@ impl SearchEngine for Searx { &Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?, ); - if let Some(no_result_msg) = document.select(&self.parser.no_result).nth(1) { + if let Some(no_result_msg) = self.parser.parse_for_no_results(&document).nth(1) { if no_result_msg.inner_html() == "we didn't find any results. Please use another query or search in more categories" { @@ -79,33 +80,16 @@ impl SearchEngine for Searx { } // scrape all the results from the html - Ok(document - .select(&self.parser.results) - .map(|result| { - SearchResult::new( - result - .select(&self.parser.result_title) - .next() - .unwrap() - .inner_html() - .trim(), - result - .select(&self.parser.result_url) - .next() - .unwrap() - .value() - .attr("href") - .unwrap(), - result - .select(&self.parser.result_desc) - .next() - .unwrap() - .inner_html() - .trim(), - &["searx"], - ) + self.parser + .parse_for_results(&document, |title, url, desc| { + url.value().attr("href").map(|url| { + SearchResult::new( + title.inner_html().trim(), + url, + desc.inner_html().trim(), + &["searx"], + ) + }) }) - .map(|search_result| (search_result.url.clone(), search_result)) - .collect()) } } diff --git a/src/models/engine_models.rs b/src/models/engine_models.rs index 77ec4c4..d53fc0c 100644 --- a/src/models/engine_models.rs +++ b/src/models/engine_models.rs @@ -8,7 +8,7 @@ use std::{collections::HashMap, fmt, time::Duration}; /// A custom error type used for handle engine associated errors. #[derive(Debug)] pub enum EngineError { - // No matching engine found + /// No matching engine found EngineNotFound, /// This variant handles all request related errors like forbidden, not found, /// etc. From 32abacb4c30d1c3756d2e5d5a15a7818445b1ddd Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Mon, 25 Sep 2023 00:00:06 +0200 Subject: [PATCH 5/8] Fix missing doc errors --- src/engines/duckduckgo.rs | 2 +- src/engines/search_result_parser.rs | 5 +++++ src/engines/searx.rs | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 8a28dda..352a33b 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -18,7 +18,7 @@ use super::search_result_parser::SearchResultParser; /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. pub struct DuckDuckGo { - // The parser, used to interpret the search result. + /// The parser, used to interpret the search result. parser: SearchResultParser, } diff --git a/src/engines/search_result_parser.rs b/src/engines/search_result_parser.rs index 94fe0e8..0512bdd 100644 --- a/src/engines/search_result_parser.rs +++ b/src/engines/search_result_parser.rs @@ -7,10 +7,15 @@ use scraper::{html::Select, ElementRef, Html, Selector}; /// A html search result parser, based on a predefined CSS selectors. pub struct SearchResultParser { + /// selector to locate the element which is displayed, if there were nothing found. no_result: Selector, + /// selector to locate the element which contains one item from the search result. results: Selector, + /// selector to locate the title relative to the search result item. result_title: Selector, + /// selector to locate the url relative to the search result item. result_url: Selector, + /// selector to locate the description relative to the search result item. result_desc: Selector, } diff --git a/src/engines/searx.rs b/src/engines/searx.rs index ca08b98..79c1e95 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -14,7 +14,7 @@ use error_stack::{Report, Result, ResultExt}; /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. pub struct Searx { - // The parser, used to interpret the search result. + /// The parser, used to interpret the search result. parser: SearchResultParser, } From 8ed4c9e206b10bdc0d8ae59d160cadc24ada8d53 Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Sat, 7 Oct 2023 10:31:58 +0200 Subject: [PATCH 6/8] Propagate errors upward, if an engine can't be initialized --- src/config/parser.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index d7202a6..0acdd25 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -3,6 +3,7 @@ use crate::handler::paths::{file_path, FileType}; +use crate::models::engine_models::{EngineError, EngineHandler}; use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; use mlua::Lua; @@ -28,7 +29,7 @@ pub struct Config { /// It stores the option to whether enable or disable debug mode. pub debug: bool, /// It stores all the engine names that were enabled by the user. - pub upstream_search_engines: Vec, + pub upstream_search_engines: Vec, /// It stores the time (secs) which controls the server request timeout. pub request_timeout: u8, /// It stores the number of threads which controls the app will use to run. @@ -111,8 +112,8 @@ impl Config { .get::<_, HashMap>("upstream_search_engines")? .into_iter() .filter_map(|(key, value)| value.then_some(key)) - .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine).ok()) - .collect(), + .map(|engine| EngineHandler::new(&engine)) + .collect::, error_stack::Report>>()?, request_timeout: globals.get::<_, u8>("request_timeout")?, threads, rate_limiter: RateLimiter { From f56002dca67a0213981f3948b850d3dba8343d69 Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Sun, 8 Oct 2023 22:30:31 +0200 Subject: [PATCH 7/8] Rename the error to NoSuchEngineFound and add the name of missing engine to it --- src/models/aggregation_models.rs | 4 ++-- src/models/engine_models.rs | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/models/aggregation_models.rs b/src/models/aggregation_models.rs index 6e4bddf..660804e 100644 --- a/src/models/aggregation_models.rs +++ b/src/models/aggregation_models.rs @@ -85,14 +85,14 @@ impl EngineErrorInfo { pub fn new(error: &EngineError, engine: &str) -> Self { Self { error: match error { - EngineError::EngineNotFound => "EngineNotFound".to_owned(), + EngineError::NoSuchEngineFound(_) => "EngineNotFound".to_owned(), EngineError::RequestError => "RequestError".to_owned(), EngineError::EmptyResultSet => "EmptyResultSet".to_owned(), EngineError::UnexpectedError => "UnexpectedError".to_owned(), }, engine: engine.to_owned(), severity_color: match error { - EngineError::EngineNotFound => "red".to_owned(), + EngineError::NoSuchEngineFound(_) => "red".to_owned(), EngineError::RequestError => "green".to_owned(), EngineError::EmptyResultSet => "blue".to_owned(), EngineError::UnexpectedError => "red".to_owned(), diff --git a/src/models/engine_models.rs b/src/models/engine_models.rs index d53fc0c..05b5a11 100644 --- a/src/models/engine_models.rs +++ b/src/models/engine_models.rs @@ -9,7 +9,7 @@ use std::{collections::HashMap, fmt, time::Duration}; #[derive(Debug)] pub enum EngineError { /// No matching engine found - EngineNotFound, + NoSuchEngineFound(String), /// This variant handles all request related errors like forbidden, not found, /// etc. EmptyResultSet, @@ -26,8 +26,8 @@ pub enum EngineError { impl fmt::Display for EngineError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - EngineError::EngineNotFound => { - write!(f, "Search engine not found") + EngineError::NoSuchEngineFound(engine) => { + write!(f, "No such engine with the name '{engine}' found") } EngineError::EmptyResultSet => { write!(f, "The upstream search engine returned an empty result set") @@ -150,7 +150,11 @@ impl EngineHandler { let engine = crate::engines::searx::Searx::new()?; ("searx", Box::new(engine)) } - _ => return Err(Report::from(EngineError::EngineNotFound)), + _ => { + return Err(Report::from(EngineError::NoSuchEngineFound( + engine_name.to_string(), + ))) + } }; Ok(Self { From 8156f7ea79099d3c64739bf94840db14ab134aad Mon Sep 17 00:00:00 2001 From: Ananth B Prathap <136650032+KekmaTime@users.noreply.github.com> Date: Mon, 9 Oct 2023 10:09:12 -0700 Subject: [PATCH 8/8] =?UTF-8?q?=F0=9F=94=A7=20Remove=20the=20unused=20mapp?= =?UTF-8?q?ed=20ports=20for=20docker=20deployment=20(#309)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * removed-ports * 🚀 chore: bump the app version (#309) * 🚀 chore: bump the app version (#309) --------- Co-authored-by: alamin655 --- Cargo.lock | 2 +- Cargo.toml | 2 +- docker-compose.yml | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 560acdb..2074fd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3965,7 +3965,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "1.0.9" +version = "1.0.11" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index 2e543b6..2b12cac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "1.0.9" +version = "1.0.11" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" diff --git a/docker-compose.yml b/docker-compose.yml index 6b50b24..1bfa54e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,4 @@ services: - ./websurfx/:/etc/xdg/websurfx/ # Uncomment the following lines if you are using the `hybrid` or `redis` caching feature. # redis: - # image: redis:latest - # ports: - # - 6379:6379 \ No newline at end of file + # image: redis:latest \ No newline at end of file