diff --git a/src/config_parser/parser.rs b/src/config_parser/parser.rs index fd94673..9bb0150 100644 --- a/src/config_parser/parser.rs +++ b/src/config_parser/parser.rs @@ -18,6 +18,10 @@ static CONFIG_FILE_NAME: &str = "config.lua"; /// * `style` - It stores the theming options for the website. /// * `redis_connection_url` - It stores the redis connection url address on which the redis /// client should connect. +/// * `aggregator` - It stores the option to whether enable or disable production use. +/// * `logging` - It stores the option to whether enable or disable logs. +/// * `debug` - It stores the option to whether enable or disable debug mode. +/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user. #[derive(Clone)] pub struct Config { pub port: u16, @@ -31,9 +35,13 @@ pub struct Config { } /// Configuration options for the aggregator. +/// +/// # Fields +/// +/// * `random_delay` - It stores the option to whether enable or disable random delays between +/// requests. #[derive(Clone)] pub struct AggreatorConfig { - /// Whether to introduce a random delay before sending the request to the search engine. pub random_delay: bool, } diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 61ff9cb..bbe561f 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -13,28 +13,29 @@ use super::engine_models::{EngineError, SearchEngine}; use error_stack::{IntoReport, Report, Result, ResultExt}; -/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped -/// results like title, visiting_url (href in html),engine (from which engine it was fetched from) -/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and -/// values are RawSearchResult struct and then returns it within a Result enum. -/// -/// # Arguments -/// -/// * `query` - Takes the user provided query to query to the upstream search engine with. -/// * `page` - Takes an u32 as an argument. -/// * `user_agent` - Takes a random user agent string as an argument. -/// -/// # Errors -/// -/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to -/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to -/// provide results for the requested search query and also returns error if the scraping selector -/// or HeaderMap fails to initialize. - +/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to +/// reduce code duplication as well as allows to create vector of different search engines easily. pub struct DuckDuckGo; #[async_trait::async_trait] impl SearchEngine for DuckDuckGo { + /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped + /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) + /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and + /// values are RawSearchResult struct and then returns it within a Result enum. + /// + /// # Arguments + /// + /// * `query` - Takes the user provided query to query to the upstream search engine with. + /// * `page` - Takes an u32 as an argument. + /// * `user_agent` - Takes a random user agent string as an argument. + /// + /// # Errors + /// + /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to + /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to + /// provide results for the requested search query and also returns error if the scraping selector + /// or HeaderMap fails to initialize. async fn results( &self, query: String, diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs index bf4b6be..e6eb50f 100644 --- a/src/engines/engine_models.rs +++ b/src/engines/engine_models.rs @@ -43,6 +43,7 @@ impl fmt::Display for EngineError { impl error_stack::Context for EngineError {} +/// A trait to define common behaviour for all search engines. #[async_trait::async_trait] pub trait SearchEngine { async fn fetch_html_from_upstream( @@ -53,7 +54,7 @@ pub trait SearchEngine { // fetch the html from upstream search engine Ok(reqwest::Client::new() .get(url) - .timeout(Duration::from_secs(30)) + .timeout(Duration::from_secs(30)) // Add timeout to request to avoid DDOSing the server .headers(header_map) // add spoofed headers to emulate human behaviour .send() .await diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 71bc80d..6706e3c 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -11,28 +11,30 @@ use crate::search_results_handler::aggregation_models::RawSearchResult; use super::engine_models::{EngineError, SearchEngine}; use error_stack::{IntoReport, Report, Result, ResultExt}; -/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped -/// results like title, visiting_url (href in html),engine (from which engine it was fetched from) -/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and -/// values are RawSearchResult struct and then returns it within a Result enum. -/// -/// # Arguments -/// -/// * `query` - Takes the user provided query to query to the upstream search engine with. -/// * `page` - Takes an u32 as an argument. -/// * `user_agent` - Takes a random user agent string as an argument. -/// -/// # Errors -/// -/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to -/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to -/// provide results for the requested search query and also returns error if the scraping selector -/// or HeaderMap fails to initialize. - +/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to +/// reduce code duplication as well as allows to create vector of different search engines easily. pub struct Searx; #[async_trait::async_trait] impl SearchEngine for Searx { + /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped + /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) + /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and + /// values are RawSearchResult struct and then returns it within a Result enum. + /// + /// # Arguments + /// + /// * `query` - Takes the user provided query to query to the upstream search engine with. + /// * `page` - Takes an u32 as an argument. + /// * `user_agent` - Takes a random user agent string as an argument. + /// + /// # Errors + /// + /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to + /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to + /// provide results for the requested search query and also returns error if the scraping selector + /// or HeaderMap fails to initialize. + async fn results( &self, query: String, diff --git a/src/search_results_handler/aggregation_models.rs b/src/search_results_handler/aggregation_models.rs index 31a43c2..4177cc3 100644 --- a/src/search_results_handler/aggregation_models.rs +++ b/src/search_results_handler/aggregation_models.rs @@ -143,6 +143,11 @@ impl EngineErrorInfo { /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of /// `SearchResult` structs. /// * `page_query` - Stores the current pages search query `q` provided in the search url. +/// * `style` - Stores the theming options for the website. +/// * `engine_errors_info` - Stores the information on which engines failed with their engine name +/// and the type of error that caused it. +/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the +/// given search query. #[derive(Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SearchResults { @@ -162,6 +167,8 @@ impl SearchResults { /// and stores it into a vector of `SearchResult` structs. /// * `page_query` - Takes an argument of current page`s search query `q` provided in /// the search url. + /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the + /// given search query. pub fn new( results: Vec, page_query: String, @@ -176,14 +183,17 @@ impl SearchResults { } } + /// A setter function to add website style to the return search results. pub fn add_style(&mut self, style: Style) { self.style = style; } + /// A function which checks whether the results stored are empty or not. pub fn is_empty_result_set(&self) -> bool { self.results.is_empty() } + /// A setter function which sets the empty_result_set to true. pub fn set_empty_result_set(&mut self) { self.empty_result_set = true; } diff --git a/src/search_results_handler/aggregator.rs b/src/search_results_handler/aggregator.rs index 197c3d7..ec5c065 100644 --- a/src/search_results_handler/aggregator.rs +++ b/src/search_results_handler/aggregator.rs @@ -18,14 +18,21 @@ use crate::engines::{ searx, }; +/// Aliases for long type annotations type FutureVec = Vec, Report>>>; -/// A function that aggregates all the scraped results from the above upstream engines and -/// then removes duplicate results and if two results are found to be from two or more engines -/// then puts their names together to show the results are fetched from these upstream engines -/// and then removes all data from the HashMap and puts into a struct of all results aggregated -/// into a vector and also adds the query used into the struct this is neccessory because -/// otherwise the search bar in search remains empty if searched from the query url +/// A function that aggregates all the scraped results from the above user selected upstream +/// search engines either selected from the UI or from the config file which is handled by the code +/// by matching over the selected search engines and adding the selected ones to the vector which +/// is then used to create an async task vector with `tokio::spawn` which returns a future which +/// is then awaited on in another loop and then all the collected results is filtered for errors +/// and proper results and if an error is found is then sent to the UI with the engine name and the +/// error type that caused it by putting them finallt in the returned `SearchResults` struct. Also +/// the same process also removes duplicate results and if two results are found to be from two or +/// more engines then puts their names together to show the results are fetched from these upstream +/// engines and then removes all data from the HashMap and puts into a struct of all results aggregated +/// into a vector and also adds the query used into the struct this is neccessory because otherwise the +/// search bar in search remains empty if searched from the query url. /// /// # Example: /// @@ -37,6 +44,9 @@ type FutureVec = Vec, Report< /// * `query` - Accepts a string to query with the above upstream search engines. /// * `page` - Accepts an u32 page number. /// * `random_delay` - Accepts a boolean value to add a random delay before making the request. +/// * `debug` - Accepts a boolean value to enable or disable debug mode option. +/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the +/// user through the UI or the config file. /// /// # Error /// diff --git a/src/server/routes.rs b/src/server/routes.rs index c614139..ada2ef2 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -51,6 +51,13 @@ pub async fn not_found( .body(page_content)) } +/// A named struct which is used to deserialize the cookies fetched from the client side. +/// +/// # Fields +/// +/// * `theme` - It stores the theme name used in the website. +/// * `colorscheme` - It stores the colorscheme name used for the website theme. +/// * `engines` - It stores the user selected upstream search engines selected from the UI. #[allow(dead_code)] #[derive(Deserialize)] struct Cookie { @@ -126,7 +133,7 @@ pub async fn search( // fetch the cached results json. let cached_results_json = redis_cache.cached_results_json(&page_url); - // check if fetched results was indeed fetched or it was an error and if so + // check if fetched catch results was indeed fetched or it was an error and if so // handle the data accordingly. match cached_results_json { Ok(results_json) => { @@ -135,6 +142,10 @@ pub async fn search( Ok(HttpResponse::Ok().body(page_content)) } Err(_) => { + // check if the cookie value is empty or not if it is empty then use the + // default selected upstream search engines from the config file otherwise + // parse the non-empty cookie and grab the user selected engines from the + // UI and use that. let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") { Some(cookie_value) => { let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?; @@ -143,6 +154,9 @@ pub async fn search( None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?, }; results_json.add_style(config.style.clone()); + // check whether the results grabbed from the upstream engines are empty or + // not if they are empty then set the empty_result_set option to true in + // the result json. if results_json.is_empty_result_set() { results_json.set_empty_result_set(); }