⚙️ refactor: change & add documentation to the code based on the lints (#205)

2024-11-22 14:08:23 -05:00 · 2023-09-03 19:23:34 +03:00 · 2023-09-03 19:23:34 +03:00 · 049b1c1ddd
commit 049b1c1ddd
parent 0d2d449889
16 changed files with 177 additions and 132 deletions
--- a/src/cache/cacher.rs
+++ b/src/cache/cacher.rs
@ -6,11 +6,8 @@ use redis::{Client, Commands, Connection};
 /// A named struct which stores the redis Connection url address to which the client will
 /// connect to.
 ///
 /// # Fields
 ///
 /// * `redis_connection_url` - It stores the redis Connection url address.
 pub struct RedisCache {
    /// It stores the redis Connection url address.
    connection: Connection,
 }
--- a/src/cache/mod.rs
+++ b/src/cache/mod.rs
@ -1 +1,4 @@
 //! This module provides the modules which provide the functionality to cache the aggregated
 //! results fetched and aggregated from the upstream search engines in a json format.
 pub mod cacher;
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@ -1,2 +1,5 @@
 //! This module provides the modules which handles the functionality to parse the lua config
 //! and convert the config options into rust readable form.
 pub mod parser;
 pub mod parser_models;
--- a/src/config/parser.rs
+++ b/src/config/parser.rs
@ -9,42 +9,36 @@ use rlua::Lua;
 use std::{collections::HashMap, fs, thread::available_parallelism};
 /// A named struct which stores the parsed config file options.
 ///
 /// # Fields
 //
 /// * `port` - It stores the parsed port number option on which the server should launch.
 /// * `binding_ip` - It stores the parsed ip address option on which the server should launch
 /// * `style` - It stores the theming options for the website.
 /// * `redis_url` - It stores the redis connection url address on which the redis
 /// client should connect.
 /// * `aggregator` -  It stores the option to whether enable or disable production use.
 /// * `logging` - It stores the option to whether enable or disable logs.
 /// * `debug` - It stores the option to whether enable or disable debug mode.
 /// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
 /// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
 /// * `threads` - It stores the number of threads which controls the app will use to run.
 #[derive(Clone)]
 pub struct Config {
    /// It stores the parsed port number option on which the server should launch.
    pub port: u16,
    /// It stores the parsed ip address option on which the server should launch
    pub binding_ip: String,
    /// It stores the theming options for the website.
    pub style: Style,
    /// It stores the redis connection url address on which the redis
    /// client should connect.
    pub redis_url: String,
    /// It stores the option to whether enable or disable production use.
    pub aggregator: AggregatorConfig,
    /// It stores the option to whether enable or disable logs.
    pub logging: bool,
    /// It stores the option to whether enable or disable debug mode.
    pub debug: bool,
    /// It stores all the engine names that were enabled by the user.
    pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
    /// It stores the time (secs) which controls the server request timeout.
    pub request_timeout: u8,
    /// It stores the number of threads which controls the app will use to run.
    pub threads: u8,
 }
 /// Configuration options for the aggregator.
 ///
 /// # Fields
 ///
 /// * `random_delay` - It stores the option to whether enable or disable random delays between
 /// requests.
 #[derive(Clone)]
 pub struct AggregatorConfig {
    /// It stores the option to whether enable or disable random delays between
    /// requests.
    pub random_delay: bool,
 }
@ -115,6 +109,11 @@ impl Config {
 }
 /// a helper function that sets the proper logging level
 ///
 /// # Arguments
 ///
 /// * `debug` - It takes the option to whether enable or disable debug mode.
 /// * `logging` - It takes the option to whether enable or disable logs.
 fn set_logging_level(debug: bool, logging: bool) {
    if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
        if pkg_env_var.to_lowercase() == "dev" {
--- a/src/config/parser_models.rs
+++ b/src/config/parser_models.rs
@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
 /// order to allow the deserializing the json back to struct in aggregate function in
 /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
 /// it to the template files.
 ///
 /// # Fields
 //
 /// * `theme` - It stores the parsed theme option used to set a theme for the website.
 /// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
 /// theme being used.
 #[derive(Serialize, Deserialize, Clone)]
 pub struct Style {
    /// It stores the parsed theme option used to set a theme for the website.
    pub theme: String,
    /// It stores the parsed colorscheme option used to set a colorscheme for the
    /// theme being used.
    pub colorscheme: String,
 }
--- a/src/engines/duckduckgo.rs
+++ b/src/engines/duckduckgo.rs
@ -19,25 +19,7 @@ pub struct DuckDuckGo;
 #[async_trait::async_trait]
 impl SearchEngine for DuckDuckGo {
-    /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
+        async fn results(
    /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
    /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
    /// values are RawSearchResult struct and then returns it within a Result enum.
    ///
    /// # Arguments
    ///
    /// * `query` - Takes the user provided query to query to the upstream search engine with.
    /// * `page` - Takes an u32 as an argument.
    /// * `user_agent` - Takes a random user agent string as an argument.
    /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
    ///
    /// # Errors
    ///
    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
    /// provide results for the requested search query and also returns error if the scraping selector
    /// or HeaderMap fails to initialize.
    async fn results(
        &self,
        query: String,
        page: u32,
--- a/src/engines/engine_models.rs
+++ b/src/engines/engine_models.rs
@ -6,19 +6,18 @@ use error_stack::{IntoReport, Result, ResultExt};
 use std::{collections::HashMap, fmt, time::Duration};
 /// A custom error type used for handle engine associated errors.
 ///
 /// This enum provides variants three different categories of errors:
 /// * `RequestError` - This variant handles all request related errors like forbidden, not found,
 /// etc.
 /// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
 /// search engines.
 /// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
 /// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
 /// all other errors occurring within the code handling the `upstream search engines`.
 #[derive(Debug)]
 pub enum EngineError {
    /// This variant handles all request related errors like forbidden, not found,
    /// etc.
    EmptyResultSet,
    /// This variant handles the not results found error provide by the upstream
    /// search engines.
    RequestError,
    ///  This variant handles all the errors which are unexpected or occur rarely
    /// and are errors mostly related to failure in initialization of HeaderMap,
    /// Selector errors and all other errors occurring within the code handling
    /// the `upstream search engines`.
    UnexpectedError,
 }
@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
 /// A trait to define common behavior for all search engines.
 #[async_trait::async_trait]
 pub trait SearchEngine: Sync + Send {
    /// This helper function fetches/requests the search results from the upstream search engine in
    /// an html form.
    ///
    /// # Arguments
    ///
    /// * `url` - It takes the url of the upstream search engine with the user requested search
    /// query appended in the search parameters.
    /// * `header_map` - It takes the http request headers to be sent to the upstream engine in
    /// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
    /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
    /// the amount of time for each request to remain connected when until the results can be provided
    /// by the upstream engine.
    ///
    /// # Error
    ///
    /// It returns the html data as a string if the upstream engine provides the data as expected
    /// otherwise it returns a custom `EngineError`.
    async fn fetch_html_from_upstream(
        &self,
        url: String,
@ -67,6 +83,24 @@ pub trait SearchEngine: Sync + Send {
            .change_context(EngineError::RequestError)?)
    }
    /// This function scrapes results from the upstream engine and puts all the scraped results like
    /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
    /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
    /// struct and then returns it within a Result enum.
    ///
    /// # Arguments
    ///
    /// * `query` - Takes the user provided query to query to the upstream search engine with.
    /// * `page` - Takes an u32 as an argument.
    /// * `user_agent` - Takes a random user agent string as an argument.
    /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
    ///
    /// # Errors
    ///
    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
    /// provide results for the requested search query and also returns error if the scraping selector
    /// or HeaderMap fails to initialize.
    async fn results(
        &self,
        query: String,
@ -76,8 +110,12 @@ pub trait SearchEngine: Sync + Send {
    ) -> Result<HashMap<String, SearchResult>, EngineError>;
 }
 /// A named struct which stores the engine struct with the name of the associated engine.
 pub struct EngineHandler {
    /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
    /// the `SearchEngine` trait.
    engine: Box<dyn SearchEngine>,
    /// It stores the name of the engine to which the struct is associated to.
    name: &'static str,
 }
@ -88,7 +126,15 @@ impl Clone for EngineHandler {
 }
 impl EngineHandler {
-    /// parses an engine name into an engine handler, returns none if the engine is unknown
+    /// Parses an engine name into an engine handler.
    ///
    /// # Arguments
    ///
    /// * `engine_name` - It takes the name of the engine to which the struct was associated to.
    ///
    /// # Returns
    ///
    /// It returns an option either containing the value or a none if the engine is unknown
    pub fn new(engine_name: &str) -> Option<Self> {
        let engine: (&'static str, Box<dyn SearchEngine>) =
            match engine_name.to_lowercase().as_str() {
@ -103,6 +149,8 @@ impl EngineHandler {
        })
    }
    /// This function converts the EngineHandler type into a tuple containing the engine name and
    /// the associated engine struct.
    pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
        (self.name, self.engine)
    }
--- a/src/engines/mod.rs
+++ b/src/engines/mod.rs
@ -1,3 +1,8 @@
 //! This module provides different modules which handles the functionlity to fetch results from the
 //! upstream search engines based on user requested queries. Also provides different models to
 //! provide a standard functions to be implemented for all the upstream search engine handling
 //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
 pub mod duckduckgo;
 pub mod engine_models;
 pub mod searx;
--- a/src/engines/searx.rs
+++ b/src/engines/searx.rs
@ -17,25 +17,6 @@ pub struct Searx;
 #[async_trait::async_trait]
 impl SearchEngine for Searx {
    /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
    /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
    /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
    /// values are RawSearchResult struct and then returns it within a Result enum.
    ///
    /// # Arguments
    ///
    /// * `query` - Takes the user provided query to query to the upstream search engine with.
    /// * `page` - Takes an u32 as an argument.
    /// * `user_agent` - Takes a random user agent string as an argument.
    /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
    ///
    /// # Errors
    ///
    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
    /// provide results for the requested search query and also returns error if the scraping selector
    /// or HeaderMap fails to initialize.
    async fn results(
        &self,
        query: String,
--- a/src/handler/mod.rs
+++ b/src/handler/mod.rs
@ -1 +1,5 @@
 //! This module provides modules which provide the functionality to handle paths for different
 //! files present on different paths and provide one appropriate path on which it is present and
 //! can be used.
 pub mod paths;
--- a/src/handler/paths.rs
+++ b/src/handler/paths.rs
@ -6,20 +6,31 @@ use std::io::Error;
 use std::path::Path;
 // ------- Constants --------
-static PUBLIC_DIRECTORY_NAME: &str = "public";
+/// The constant holding the name of the theme folder.
-static COMMON_DIRECTORY_NAME: &str = "websurfx";
+const PUBLIC_DIRECTORY_NAME: &str = "public";
-static CONFIG_FILE_NAME: &str = "config.lua";
+/// The constant holding the name of the common folder.
-static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
+const COMMON_DIRECTORY_NAME: &str = "websurfx";
-static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
+/// The constant holding the name of the config file.
 const CONFIG_FILE_NAME: &str = "config.lua";
 /// The constant holding the name of the AllowList text file.
 const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
 /// The constant holding the name of the BlockList text file.
 const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
 /// An enum type which provides different variants to handle paths for various files/folders.
 #[derive(Hash, PartialEq, Eq, Debug)]
 pub enum FileType {
    /// This variant handles all the paths associated with the config file.
    Config,
    /// This variant handles all the paths associated with the Allowlist text file.
    AllowList,
    /// This variant handles all the paths associated with the BlockList text file.
    BlockList,
    /// This variant handles all the paths associated with the public folder (Theme folder).
    Theme,
 }
 /// A static variable which stores the different filesystem paths for various file/folder types.
 static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
    once_cell::sync::Lazy::new(|| {
        HashMap::from([
@ -72,26 +83,19 @@ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, V
        ])
    });
-/// A helper function which returns an appropriate config file path checking if the config
+/// A function which returns an appropriate path for thr provided file type by checking if the path
-/// file exists on that path.
+/// for the given file type exists on that path.
 ///
 /// # Error
 ///
-/// Returns a `config file not found!!` error if the config file is not present under following
+/// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
-/// paths which are:
+/// present on the path on which it is being tested.
 /// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
 /// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
 ///    one (3).
 /// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
 ///    here then it returns an error as mentioned above.
 /// A function which returns an appropriate theme directory path checking if the theme
 /// directory exists on that path.
 ///
-/// # Error
+/// # Example
 ///
 /// If this function is give the file_type of Theme variant then the theme folder is checked by the
 /// following steps:
 ///
 /// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
 /// paths which are:
 /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
 /// 2. Under project folder ( or codebase in other words) if it is not present
 ///    here then it returns an error as mentioned above.
@ -106,6 +110,6 @@ pub fn file_path(file_type: FileType) -> Result<String, Error> {
    // if no of the configs above exist, return error
    Err(Error::new(
        std::io::ErrorKind::NotFound,
-        format!("{:?} file not found!!", file_type),
+        format!("{:?} file/folder not found!!", file_type),
    ))
 }
--- a/src/results/aggregation_models.rs
+++ b/src/results/aggregation_models.rs
@ -8,20 +8,17 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
 /// A named struct to store the raw scraped search results scraped search results from the
 /// upstream search engines before aggregating it.It derives the Clone trait which is needed
 /// to write idiomatic rust using `Iterators`.
 ///
 /// # Fields
 ///
 /// * `title` - The title of the search result.
 /// * `url` - The url which is accessed when clicked on it
 /// (href url in html in simple words).
 /// * `description` - The description of the search result.
 /// * `engine` - The names of the upstream engines from which this results were provided.
 #[derive(Clone, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResult {
    /// The title of the search result.
    pub title: String,
    /// The url which is accessed when clicked on it
    pub url: String,
    /// The description of the search result.
    pub description: String,
    /// The names of the upstream engines from which this results were provided.
    pub engine: Vec<String>,
 }
@ -63,15 +60,27 @@ impl SearchResult {
    }
 }
-///
+/// A named struct that stores the error info related to the upstream search engines.
 #[derive(Serialize, Deserialize)]
 pub struct EngineErrorInfo {
    /// It stores the error type which occured while fetching the result from a particular search
    /// engine.
    pub error: String,
    /// It stores the name of the engine that failed to provide the requested search results.
    pub engine: String,
    /// It stores the name of the color to indicate whether how severe the particular error is (In
    /// other words it indicates the severity of the error/issue).
    pub severity_color: String,
 }
 impl EngineErrorInfo {
    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
    ///
    /// # Arguments
    ///
    /// * `error` - It takes the error type which occured while fetching the result from a particular
    /// search engine.
    /// * `engine` - It takes the name of the engine that failed to provide the requested search results.
    pub fn new(error: &EngineError, engine: String) -> Self {
        Self {
            error: match error {
@ -91,23 +100,18 @@ impl EngineErrorInfo {
 /// A named struct to store, serialize, deserialize the all the search results scraped and
 /// aggregated from the upstream search engines.
 ///
 /// # Fields
 ///
 /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
 /// `SearchResult` structs.
 /// * `page_query` - Stores the current pages search query `q` provided in the search url.
 /// * `style` - Stores the theming options for the website.
 /// * `engine_errors_info` - Stores the information on which engines failed with their engine name
 /// and the type of error that caused it.
 /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
 /// given search query.
 #[derive(Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
    /// Stores the individual serializable `SearchResult` struct into a vector of
    pub results: Vec<SearchResult>,
    /// Stores the current pages search query `q` provided in the search url.
    pub page_query: String,
    /// Stores the theming options for the website.
    pub style: Style,
    /// Stores the information on which engines failed with their engine name
    /// and the type of error that caused it.
    pub engine_errors_info: Vec<EngineErrorInfo>,
 }
--- a/src/results/mod.rs
+++ b/src/results/mod.rs
@ -1,3 +1,7 @@
 //! This module provides modules that handle the functionality to aggregate the fetched search
 //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
 //! provides various models to aggregate search results into a standardized form.
 pub mod aggregation_models;
 pub mod aggregator;
 pub mod user_agent;
--- a/src/results/user_agent.rs
+++ b/src/results/user_agent.rs
@ -2,6 +2,8 @@
 use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
 /// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
 /// again and again without the need of reinitializing the `UserAgents` struct.
 static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
    UserAgentsBuilder::new()
        .cache(false)
--- a/src/server/mod.rs
+++ b/src/server/mod.rs
@ -1 +1,6 @@
 //! This module provides modules that handle the functionality of handling different routes/paths
 //! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
 //! the search route. Also, caches the next, current and previous search results in the search
 //! routes with the help of the redis server.
 pub mod routes;
--- a/src/server/routes.rs
+++ b/src/server/routes.rs
@ -17,16 +17,13 @@ use serde::Deserialize;
 use tokio::join;
 /// A named struct which deserializes all the user provided search parameters and stores them.
 ///
 /// # Fields
 ///
 /// * `q` - It stores the search parameter option `q` (or query in simple words)
 /// of the search url.
 /// * `page` - It stores the search parameter `page` (or pageno in simple words)
 /// of the search url.
 #[derive(Deserialize)]
 struct SearchParams {
    /// It stores the search parameter option `q` (or query in simple words)
    /// of the search url.
    q: Option<String>,
    /// It stores the search parameter `page` (or pageno in simple words)
    /// of the search url.
    page: Option<u32>,
 }
@ -54,17 +51,14 @@ pub async fn not_found(
 }
 /// A named struct which is used to deserialize the cookies fetched from the client side.
 ///
 /// # Fields
 ///
 /// * `theme` - It stores the theme name used in the website.
 /// * `colorscheme` - It stores the colorscheme name used for the website theme.
 /// * `engines` - It stores the user selected upstream search engines selected from the UI.
 #[allow(dead_code)]
 #[derive(Deserialize)]
 struct Cookie {
    /// It stores the theme name used in the website.
    theme: String,
    /// It stores the colorscheme name used for the website theme.
    colorscheme: String,
    /// It stores the user selected upstream search engines selected from the UI.
    engines: Vec<String>,
 }
@ -149,8 +143,21 @@ pub async fn search(
    }
 }
-/// Fetches the results for a query and page.
+/// Fetches the results for a query and page. It First checks the redis cache, if that
-/// First checks the redis cache, if that fails it gets proper results
+/// fails it gets proper results by requesting from the upstream search engines.
 ///
 /// # Arguments
 ///
 /// * `url` - It takes the url of the current page that requested the search results for a
 /// particular search query.
 /// * `config` - It takes a parsed config struct.
 /// * `query` - It takes the page number as u32 value.
 /// * `req` - It takes the `HttpRequest` struct as a value.
 ///
 /// # Error
 ///
 /// It returns the `SearchResults` struct if the search results could be successfully fetched from
 /// the cache or from the upstream search engines otherwise it returns an appropriate error.
 async fn results(
    url: String,
    config: &Config,
@ -158,7 +165,7 @@ async fn results(
    page: u32,
    req: HttpRequest,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
-    //Initialize redis cache connection struct
+    // Initialize redis cache connection struct
    let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
    // fetch the cached results json.
    let cached_results_json = redis_cache.cached_json(&url);