diff --git a/Cargo.lock b/Cargo.lock index bacfd7e..29a28c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3797,7 +3797,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.20.0" +version = "0.20.1" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index 0747fb7..7d2ef7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.20.0" +version = "0.20.1" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" diff --git a/src/cache/cacher.rs b/src/cache/cacher.rs index b2508b5..57351cd 100644 --- a/src/cache/cacher.rs +++ b/src/cache/cacher.rs @@ -10,17 +10,14 @@ use super::error::PoolError; /// A named struct which stores the redis Connection url address to which the client will /// connect to. -/// -/// # Fields -/// -/// * `connection_pool` - It stores a pool of connections ready to be used. -/// * `pool_size` - It stores the size of the connection pool (in other words the number of -/// connections that should be stored in the pool). -/// * `current_connection` - It stores the index of which connection is being used at the moment. #[derive(Clone)] pub struct RedisCache { + /// It stores a pool of connections ready to be used. connection_pool: Vec, + /// It stores the size of the connection pool (in other words the number of + /// connections that should be stored in the pool). pool_size: u8, + /// It stores the index of which connection is being used at the moment. current_connection: u8, } diff --git a/src/cache/error.rs b/src/cache/error.rs index efd87c9..8bdb977 100644 --- a/src/cache/error.rs +++ b/src/cache/error.rs @@ -5,15 +5,12 @@ use std::fmt; use redis::RedisError; /// A custom error type used for handling redis async pool associated errors. -/// -/// This enum provides variants three different categories of errors: -/// * `RedisError` - This variant handles all errors related to `RedisError`, -/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error -/// which occurs when all the connections in the connection pool return a connection -/// dropped redis error. #[derive(Debug)] pub enum PoolError { + /// This variant handles all errors related to `RedisError`, RedisError(RedisError), + /// This variant handles the errors which occurs when all the connections + /// in the connection pool return a connection dropped redis error. PoolExhaustionWithConnectionDropError, } diff --git a/src/cache/mod.rs b/src/cache/mod.rs index 03c4155..f40369f 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -1,2 +1,5 @@ +//! This module provides the modules which provide the functionality to cache the aggregated +//! results fetched and aggregated from the upstream search engines in a json format. + pub mod cacher; pub mod error; diff --git a/src/config/mod.rs b/src/config/mod.rs index 11ce559..331a3d7 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,2 +1,5 @@ +//! This module provides the modules which handles the functionality to parse the lua config +//! and convert the config options into rust readable form. + pub mod parser; pub mod parser_models; diff --git a/src/config/parser.rs b/src/config/parser.rs index bce9f05..4490bc6 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -9,33 +9,33 @@ use mlua::Lua; use std::{collections::HashMap, fs, thread::available_parallelism}; /// A named struct which stores the parsed config file options. -/// -/// # Fields -// -/// * `port` - It stores the parsed port number option on which the server should launch. -/// * `binding_ip` - It stores the parsed ip address option on which the server should launch -/// * `style` - It stores the theming options for the website. -/// * `redis_url` - It stores the redis connection url address on which the redis -/// client should connect. -/// * `aggregator` - It stores the option to whether enable or disable production use. -/// * `logging` - It stores the option to whether enable or disable logs. -/// * `debug` - It stores the option to whether enable or disable debug mode. -/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user. -/// * `request_timeout` - It stores the time (secs) which controls the server request timeout. -/// * `threads` - It stores the number of threads which controls the app will use to run. #[derive(Clone)] pub struct Config { + /// It stores the parsed port number option on which the server should launch. pub port: u16, + /// It stores the parsed ip address option on which the server should launch pub binding_ip: String, + /// It stores the theming options for the website. pub style: Style, + /// It stores the redis connection url address on which the redis + /// client should connect. pub redis_url: String, + /// It stores the option to whether enable or disable production use. pub aggregator: AggregatorConfig, + /// It stores the option to whether enable or disable logs. pub logging: bool, + /// It stores the option to whether enable or disable debug mode. pub debug: bool, + /// It stores all the engine names that were enabled by the user. pub upstream_search_engines: Vec, + /// It stores the time (secs) which controls the server request timeout. pub request_timeout: u8, + /// It stores the number of threads which controls the app will use to run. pub threads: u8, + /// It stores configuration options for the ratelimiting middleware. pub rate_limiter: RateLimiter, + /// It stores the level of safe search to be used for restricting content in the + /// search results. pub safe_search: u8, } @@ -123,6 +123,11 @@ impl Config { } /// a helper function that sets the proper logging level +/// +/// # Arguments +/// +/// * `debug` - It takes the option to whether enable or disable debug mode. +/// * `logging` - It takes the option to whether enable or disable logs. fn set_logging_level(debug: bool, logging: bool) { if let Ok(pkg_env_var) = std::env::var("PKG_ENV") { if pkg_env_var.to_lowercase() == "dev" { diff --git a/src/config/parser_models.rs b/src/config/parser_models.rs index 21140dd..9dad348 100644 --- a/src/config/parser_models.rs +++ b/src/config/parser_models.rs @@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize}; /// order to allow the deserializing the json back to struct in aggregate function in /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass /// it to the template files. -/// -/// # Fields -// -/// * `theme` - It stores the parsed theme option used to set a theme for the website. -/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the -/// theme being used. #[derive(Serialize, Deserialize, Clone, Default)] pub struct Style { + /// It stores the parsed theme option used to set a theme for the website. pub theme: String, + /// It stores the parsed colorscheme option used to set a colorscheme for the + /// theme being used. pub colorscheme: String, } @@ -38,24 +35,18 @@ impl Style { } /// Configuration options for the aggregator. -/// -/// # Fields -/// -/// * `random_delay` - It stores the option to whether enable or disable random delays between -/// requests. #[derive(Clone)] pub struct AggregatorConfig { + /// It stores the option to whether enable or disable random delays between + /// requests. pub random_delay: bool, } /// Configuration options for the rate limiter middleware. -/// -/// # Fields -/// -/// * `number_of_requests` - The number of request that are allowed within a provided time limit. -/// * `time_limit` - The time limit in which the quantity of requests that should be accepted. #[derive(Clone)] pub struct RateLimiter { + /// The number of request that are allowed within a provided time limit. pub number_of_requests: u8, + /// The time limit in which the quantity of requests that should be accepted. pub time_limit: u8, } diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 7b9f7d6..edca35a 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -19,24 +19,6 @@ pub struct DuckDuckGo; #[async_trait::async_trait] impl SearchEngine for DuckDuckGo { - /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped - /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) - /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and - /// values are RawSearchResult struct and then returns it within a Result enum. - /// - /// # Arguments - /// - /// * `query` - Takes the user provided query to query to the upstream search engine with. - /// * `page` - Takes an u32 as an argument. - /// * `user_agent` - Takes a random user agent string as an argument. - /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. - /// - /// # Errors - /// - /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to - /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to - /// provide results for the requested search query and also returns error if the scraping selector - /// or HeaderMap fails to initialize. async fn results( &self, query: &str, diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs index f4e7e5a..2bd50c6 100644 --- a/src/engines/engine_models.rs +++ b/src/engines/engine_models.rs @@ -6,19 +6,18 @@ use error_stack::{Result, ResultExt}; use std::{collections::HashMap, fmt, time::Duration}; /// A custom error type used for handle engine associated errors. -/// -/// This enum provides variants three different categories of errors: -/// * `RequestError` - This variant handles all request related errors like forbidden, not found, -/// etc. -/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream -/// search engines. -/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely -/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and -/// all other errors occurring within the code handling the `upstream search engines`. #[derive(Debug)] pub enum EngineError { + /// This variant handles all request related errors like forbidden, not found, + /// etc. EmptyResultSet, + /// This variant handles the not results found error provide by the upstream + /// search engines. RequestError, + /// This variant handles all the errors which are unexpected or occur rarely + /// and are errors mostly related to failure in initialization of HeaderMap, + /// Selector errors and all other errors occurring within the code handling + /// the `upstream search engines`. UnexpectedError, } @@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {} /// A trait to define common behavior for all search engines. #[async_trait::async_trait] pub trait SearchEngine: Sync + Send { + /// This helper function fetches/requests the search results from the upstream search engine in + /// an html form. + /// + /// # Arguments + /// + /// * `url` - It takes the url of the upstream search engine with the user requested search + /// query appended in the search parameters. + /// * `header_map` - It takes the http request headers to be sent to the upstream engine in + /// order to prevent being detected as a bot. It takes the header as a HeaderMap type. + /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit + /// the amount of time for each request to remain connected when until the results can be provided + /// by the upstream engine. + /// + /// # Error + /// + /// It returns the html data as a string if the upstream engine provides the data as expected + /// otherwise it returns a custom `EngineError`. async fn fetch_html_from_upstream( &self, url: &str, @@ -65,6 +81,24 @@ pub trait SearchEngine: Sync + Send { .change_context(EngineError::RequestError)?) } + /// This function scrapes results from the upstream engine and puts all the scraped results like + /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description + /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult + /// struct and then returns it within a Result enum. + /// + /// # Arguments + /// + /// * `query` - Takes the user provided query to query to the upstream search engine with. + /// * `page` - Takes an u32 as an argument. + /// * `user_agent` - Takes a random user agent string as an argument. + /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. + /// + /// # Errors + /// + /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to + /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to + /// provide results for the requested search query and also returns error if the scraping selector + /// or HeaderMap fails to initialize. async fn results( &self, query: &str, @@ -75,8 +109,12 @@ pub trait SearchEngine: Sync + Send { ) -> Result, EngineError>; } +/// A named struct which stores the engine struct with the name of the associated engine. pub struct EngineHandler { + /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements + /// the `SearchEngine` trait. engine: Box, + /// It stores the name of the engine to which the struct is associated to. name: &'static str, } @@ -87,7 +125,15 @@ impl Clone for EngineHandler { } impl EngineHandler { - /// parses an engine name into an engine handler, returns none if the engine is unknown + /// Parses an engine name into an engine handler. + /// + /// # Arguments + /// + /// * `engine_name` - It takes the name of the engine to which the struct was associated to. + /// + /// # Returns + /// + /// It returns an option either containing the value or a none if the engine is unknown pub fn new(engine_name: &str) -> Option { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { @@ -102,6 +148,8 @@ impl EngineHandler { }) } + /// This function converts the EngineHandler type into a tuple containing the engine name and + /// the associated engine struct. pub fn into_name_engine(self) -> (&'static str, Box) { (self.name, self.engine) } diff --git a/src/engines/mod.rs b/src/engines/mod.rs index f9bb8ad..8267c93 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -1,3 +1,8 @@ +//! This module provides different modules which handles the functionlity to fetch results from the +//! upstream search engines based on user requested queries. Also provides different models to +//! provide a standard functions to be implemented for all the upstream search engine handling +//! code. Moreover, it also provides a custom error for the upstream search engine handling code. + pub mod duckduckgo; pub mod engine_models; pub mod searx; diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 4eb22c5..170364c 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -17,25 +17,6 @@ pub struct Searx; #[async_trait::async_trait] impl SearchEngine for Searx { - /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped - /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) - /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and - /// values are RawSearchResult struct and then returns it within a Result enum. - /// - /// # Arguments - /// - /// * `query` - Takes the user provided query to query to the upstream search engine with. - /// * `page` - Takes an u32 as an argument. - /// * `user_agent` - Takes a random user agent string as an argument. - /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. - /// - /// # Errors - /// - /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to - /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to - /// provide results for the requested search query and also returns error if the scraping selector - /// or HeaderMap fails to initialize. - async fn results( &self, query: &str, diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 8118b29..188767d 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -1 +1,5 @@ +//! This module provides modules which provide the functionality to handle paths for different +//! files present on different paths and provide one appropriate path on which it is present and +//! can be used. + pub mod paths; diff --git a/src/handler/paths.rs b/src/handler/paths.rs index 91f7f94..9ea5fff 100644 --- a/src/handler/paths.rs +++ b/src/handler/paths.rs @@ -7,42 +7,46 @@ use std::path::Path; use std::sync::OnceLock; // ------- Constants -------- -static PUBLIC_DIRECTORY_NAME: &str = "public"; -static COMMON_DIRECTORY_NAME: &str = "websurfx"; -static CONFIG_FILE_NAME: &str = "config.lua"; -static ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; -static BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; +/// The constant holding the name of the theme folder. +const PUBLIC_DIRECTORY_NAME: &str = "public"; +/// The constant holding the name of the common folder. +const COMMON_DIRECTORY_NAME: &str = "websurfx"; +/// The constant holding the name of the config file. +const CONFIG_FILE_NAME: &str = "config.lua"; +/// The constant holding the name of the AllowList text file. +const ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; +/// The constant holding the name of the BlockList text file. +const BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; +/// An enum type which provides different variants to handle paths for various files/folders. #[derive(Hash, PartialEq, Eq, Debug)] pub enum FileType { + /// This variant handles all the paths associated with the config file. Config, + /// This variant handles all the paths associated with the Allowlist text file. AllowList, + /// This variant handles all the paths associated with the BlockList text file. BlockList, + /// This variant handles all the paths associated with the public folder (Theme folder). Theme, } +/// A static variable which stores the different filesystem paths for various file/folder types. static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock>> = OnceLock::new(); -/// A helper function which returns an appropriate config file path checking if the config -/// file exists on that path. +/// A function which returns an appropriate path for thr provided file type by checking if the path +/// for the given file type exists on that path. /// /// # Error /// -/// Returns a `config file not found!!` error if the config file is not present under following -/// paths which are: -/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2) -/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next -/// one (3). -/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present -/// here then it returns an error as mentioned above. - -/// A function which returns an appropriate theme directory path checking if the theme -/// directory exists on that path. +/// Returns a ` folder/file not found!!` error if the give file_type folder/file is not +/// present on the path on which it is being tested. /// -/// # Error +/// # Example +/// +/// If this function is give the file_type of Theme variant then the theme folder is checked by the +/// following steps: /// -/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following -/// paths which are: /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) /// 2. Under project folder ( or codebase in other words) if it is not present /// here then it returns an error as mentioned above. @@ -110,6 +114,6 @@ pub fn file_path(file_type: FileType) -> Result<&'static str, Error> { // if no of the configs above exist, return error Err(Error::new( std::io::ErrorKind::NotFound, - format!("{:?} file not found!!", file_type), + format!("{:?} file/folder not found!!", file_type), )) } diff --git a/src/lib.rs b/src/lib.rs index 6c63270..b33ace3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,10 @@ //! This main library module provides the functionality to provide and handle the Tcp server //! and register all the routes for the `websurfx` meta search engine website. +#![forbid(unsafe_code, clippy::panic)] +#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)] +#![warn(clippy::cognitive_complexity, rust_2018_idioms)] + pub mod cache; pub mod config; pub mod engines; @@ -41,7 +45,7 @@ use handler::paths::{file_path, FileType}; /// let server = run(listener,config).expect("Failed to start server"); /// ``` pub fn run(listener: TcpListener, config: Config) -> std::io::Result { - let mut handlebars: Handlebars = Handlebars::new(); + let mut handlebars: Handlebars<'_> = Handlebars::new(); let public_folder_path: &str = file_path(FileType::Theme)?; @@ -49,7 +53,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { .register_templates_directory(".html", format!("{}/templates", public_folder_path)) .unwrap(); - let handlebars_ref: web::Data = web::Data::new(handlebars); + let handlebars_ref: web::Data> = web::Data::new(handlebars); let cloned_config_threads_opt: u8 = config.threads; diff --git a/src/results/aggregation_models.rs b/src/results/aggregation_models.rs index 280767c..30316e2 100644 --- a/src/results/aggregation_models.rs +++ b/src/results/aggregation_models.rs @@ -9,20 +9,17 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError}; /// A named struct to store the raw scraped search results scraped search results from the /// upstream search engines before aggregating it.It derives the Clone trait which is needed /// to write idiomatic rust using `Iterators`. -/// -/// # Fields -/// -/// * `title` - The title of the search result. -/// * `url` - The url which is accessed when clicked on it /// (href url in html in simple words). -/// * `description` - The description of the search result. -/// * `engine` - The names of the upstream engines from which this results were provided. -#[derive(Clone, Serialize, Deserialize, Debug)] +#[derive(Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SearchResult { + /// The title of the search result. pub title: String, + /// The url which is accessed when clicked on it pub url: String, + /// The description of the search result. pub description: String, + /// The names of the upstream engines from which this results were provided. pub engine: SmallVec<[String; 0]>, } @@ -64,14 +61,27 @@ impl SearchResult { } } +/// A named struct that stores the error info related to the upstream search engines. #[derive(Serialize, Deserialize, Clone)] pub struct EngineErrorInfo { + /// It stores the error type which occured while fetching the result from a particular search + /// engine. pub error: String, + /// It stores the name of the engine that failed to provide the requested search results. pub engine: String, + /// It stores the name of the color to indicate whether how severe the particular error is (In + /// other words it indicates the severity of the error/issue). pub severity_color: String, } impl EngineErrorInfo { + /// Constructs a new `SearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `error` - It takes the error type which occured while fetching the result from a particular + /// search engine. + /// * `engine` - It takes the name of the engine that failed to provide the requested search results. pub fn new(error: &EngineError, engine: &str) -> Self { Self { error: match error { @@ -91,25 +101,26 @@ impl EngineErrorInfo { /// A named struct to store, serialize, deserialize the all the search results scraped and /// aggregated from the upstream search engines. -/// -/// # Fields -/// -/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of /// `SearchResult` structs. -/// * `page_query` - Stores the current pages search query `q` provided in the search url. -/// * `style` - Stores the theming options for the website. -/// * `engine_errors_info` - Stores the information on which engines failed with their engine name -/// and the type of error that caused it. -/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the -/// given search query. #[derive(Serialize, Deserialize, Default)] #[serde(rename_all = "camelCase")] pub struct SearchResults { + /// Stores the individual serializable `SearchResult` struct into a vector of pub results: Vec, + /// Stores the current pages search query `q` provided in the search url. pub page_query: String, + /// Stores the theming options for the website. pub style: Style, + /// Stores the information on which engines failed with their engine name + /// and the type of error that caused it. pub engine_errors_info: Vec, + /// Stores the flag option which holds the check value that the following + /// search query was disallowed when the safe search level set to 4 and it + /// was present in the `Blocklist` file. pub disallowed: bool, + /// Stores the flag option which holds the check value that the following + /// search query was filtered when the safe search level set to 3 and it + /// was present in the `Blocklist` file. pub filtered: bool, } @@ -122,9 +133,8 @@ impl SearchResults { /// and stores it into a vector of `SearchResult` structs. /// * `page_query` - Takes an argument of current page`s search query `q` provided in /// the search url. - /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the - /// given search query. - /// * `` + /// * `engine_errors_info` - Takes an array of structs which contains information regarding + /// which engines failed with their names, reason and their severity color name. pub fn new( results: Vec, page_query: &str, diff --git a/src/results/mod.rs b/src/results/mod.rs index 0c13442..b08eec0 100644 --- a/src/results/mod.rs +++ b/src/results/mod.rs @@ -1,3 +1,7 @@ +//! This module provides modules that handle the functionality to aggregate the fetched search +//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also, +//! provides various models to aggregate search results into a standardized form. + pub mod aggregation_models; pub mod aggregator; pub mod user_agent; diff --git a/src/results/user_agent.rs b/src/results/user_agent.rs index 3bfa05b..ab2811b 100644 --- a/src/results/user_agent.rs +++ b/src/results/user_agent.rs @@ -4,6 +4,8 @@ use std::sync::OnceLock; use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder}; +/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused +/// again and again without the need of reinitializing the `UserAgents` struct. static USER_AGENTS: OnceLock = OnceLock::new(); /// A function to generate random user agent to improve privacy of the user. diff --git a/src/server/mod.rs b/src/server/mod.rs index 6a664ab..f5d2ce9 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -1 +1,6 @@ +//! This module provides modules that handle the functionality of handling different routes/paths +//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in +//! the search route. Also, caches the next, current and previous search results in the search +//! routes with the help of the redis server. + pub mod routes; diff --git a/src/server/routes.rs b/src/server/routes.rs index 3d69e78..57aa413 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -25,17 +25,16 @@ use tokio::join; static REDIS_CACHE: async_once_cell::OnceCell = async_once_cell::OnceCell::new(); /// A named struct which deserializes all the user provided search parameters and stores them. -/// -/// # Fields -/// -/// * `q` - It stores the search parameter option `q` (or query in simple words) -/// of the search url. -/// * `page` - It stores the search parameter `page` (or pageno in simple words) -/// of the search url. #[derive(Deserialize)] struct SearchParams { + /// It stores the search parameter option `q` (or query in simple words) + /// of the search url. q: Option, + /// It stores the search parameter `page` (or pageno in simple words) + /// of the search url. page: Option, + /// It stores the search parameter `safesearch` (or safe search level in simple words) of the + /// search url. safesearch: Option, } @@ -63,17 +62,14 @@ pub async fn not_found( } /// A named struct which is used to deserialize the cookies fetched from the client side. -/// -/// # Fields -/// -/// * `theme` - It stores the theme name used in the website. -/// * `colorscheme` - It stores the colorscheme name used for the website theme. -/// * `engines` - It stores the user selected upstream search engines selected from the UI. #[allow(dead_code)] #[derive(Deserialize)] struct Cookie<'a> { + /// It stores the theme name used in the website. theme: &'a str, + /// It stores the colorscheme name used for the website theme. colorscheme: &'a str, + /// It stores the user selected upstream search engines selected from the UI. engines: Vec<&'a str>, } @@ -174,8 +170,21 @@ pub async fn search( } } -/// Fetches the results for a query and page. -/// First checks the redis cache, if that fails it gets proper results +/// Fetches the results for a query and page. It First checks the redis cache, if that +/// fails it gets proper results by requesting from the upstream search engines. +/// +/// # Arguments +/// +/// * `url` - It takes the url of the current page that requested the search results for a +/// particular search query. +/// * `config` - It takes a parsed config struct. +/// * `query` - It takes the page number as u32 value. +/// * `req` - It takes the `HttpRequest` struct as a value. +/// +/// # Error +/// +/// It returns the `SearchResults` struct if the search results could be successfully fetched from +/// the cache or from the upstream search engines otherwise it returns an appropriate error. async fn results( url: String, config: &Config, @@ -184,6 +193,7 @@ async fn results( req: HttpRequest, safe_search: u8, ) -> Result> { + // Initialize redis cache connection struct let mut redis_cache: RedisCache = REDIS_CACHE .get_or_init(async { // Initialize redis cache connection pool only one and store it in the heap. @@ -191,7 +201,6 @@ async fn results( }) .await .clone(); - // fetch the cached results json. let cached_results_json: Result> = redis_cache.clone().cached_json(&url).await; @@ -223,7 +232,8 @@ async fn results( // UI and use that. let mut results: SearchResults = match req.cookie("appCookie") { Some(cookie_value) => { - let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?; + let cookie_value: Cookie<'_> = + serde_json::from_str(cookie_value.name_value().1)?; let engines: Vec = cookie_value .engines @@ -267,6 +277,8 @@ async fn results( } } +/// A helper function which checks whether the search query contains any keywords which should be +/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files. fn is_match_from_filter_list( file_path: &str, query: &str,