websurfx/src/models/aggregation_models.rs

//! This module provides public models for handling, storing and serializing of search results
//! data scraped from the upstream search engines.

use serde::{Deserialize, Serialize};
use smallvec::SmallVec;

use super::{engine_models::EngineError, parser_models::Style};

/// A named struct to store the raw scraped search results scraped search results from the
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
/// to write idiomatic rust using `Iterators`.
/// (href url in html in simple words).
#[derive(Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
    /// The title of the search result.
    pub title: String,
    /// The url which is accessed when clicked on it
    pub url: String,
    /// The description of the search result.
    pub description: String,
    /// The names of the upstream engines from which this results were provided.
    pub engine: SmallVec<[String; 0]>,
}

impl SearchResult {
    /// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
    ///
    /// # Arguments
    ///
    /// * `title` - The title of the search result.
    /// * `url` - The url which is accessed when clicked on it
    /// (href url in html in simple words).
    /// * `description` - The description of the search result.
    /// * `engine` - The names of the upstream engines from which this results were provided.
    pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
        SearchResult {
            title: title.to_owned(),
            url: url.to_owned(),
            description: description.to_owned(),
            engine: engine.iter().map(|name| name.to_string()).collect(),
        }
    }

    /// A function which adds the engine name provided as a string into a vector of strings.
    ///
    /// # Arguments
    ///
    /// * `engine` - Takes an engine name provided as a String.
    pub fn add_engines(&mut self, engine: &str) {
        self.engine.push(engine.to_owned())
    }

    /// A function which returns the engine name stored from the struct as a string.
    ///
    /// # Returns
    ///
    /// An engine name stored as a string from the struct.
    pub fn engine(&mut self) -> String {
        std::mem::take(&mut self.engine[0])
    }
}

/// A named struct that stores the error info related to the upstream search engines.
#[derive(Serialize, Deserialize, Clone)]
pub struct EngineErrorInfo {
    /// It stores the error type which occured while fetching the result from a particular search
    /// engine.
    pub error: String,
    /// It stores the name of the engine that failed to provide the requested search results.
    pub engine: String,
    /// It stores the name of the color to indicate whether how severe the particular error is (In
    /// other words it indicates the severity of the error/issue).
    pub severity_color: String,
}

impl EngineErrorInfo {
    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
    ///
    /// # Arguments
    ///
    /// * `error` - It takes the error type which occured while fetching the result from a particular
    /// search engine.
    /// * `engine` - It takes the name of the engine that failed to provide the requested search results.
    pub fn new(error: &EngineError, engine: &str) -> Self {
        Self {
            error: match error {
                EngineError::RequestError => "RequestError".to_owned(),
                EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
                EngineError::UnexpectedError => "UnexpectedError".to_owned(),
            },
            engine: engine.to_owned(),
            severity_color: match error {
                EngineError::RequestError => "green".to_owned(),
                EngineError::EmptyResultSet => "blue".to_owned(),
                EngineError::UnexpectedError => "red".to_owned(),
            },
        }
    }
}

/// A named struct to store, serialize, deserialize the all the search results scraped and
/// aggregated from the upstream search engines.
/// `SearchResult` structs.
#[derive(Serialize, Deserialize, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SearchResults {
    /// Stores the individual serializable `SearchResult` struct into a vector of
    pub results: Vec<SearchResult>,
    /// Stores the current pages search query `q` provided in the search url.
    pub page_query: String,
    /// Stores the theming options for the website.
    pub style: Style,
    /// Stores the information on which engines failed with their engine name
    /// and the type of error that caused it.
    pub engine_errors_info: Vec<EngineErrorInfo>,
    /// Stores the flag option which holds the check value that the following
    /// search query was disallowed when the safe search level set to 4 and it
    /// was present in the `Blocklist` file.
    pub disallowed: bool,
    /// Stores the flag option which holds the check value that the following
    /// search query was filtered when the safe search level set to 3 and it
    /// was present in the `Blocklist` file.
    pub filtered: bool,
    /// Stores the safe search level `safesearch` provided in the search url.
    pub safe_search_level: u8,
    /// Stores the flag option which holds the check value that whether any search engines were
    /// selected or not.
    pub no_engines_selected: bool,
}

impl SearchResults {
    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
    ///
    /// # Arguments
    ///
    /// * `results` - Takes an argument of individual serializable `SearchResult` struct
    /// and stores it into a vector of `SearchResult` structs.
    /// * `page_query` - Takes an argument of current page`s search query `q` provided in
    /// the search url.
    /// * `engine_errors_info` - Takes an array of structs which contains information regarding
    /// which engines failed with their names, reason and their severity color name.
    pub fn new(
        results: Vec<SearchResult>,
        page_query: &str,
        engine_errors_info: &[EngineErrorInfo],
    ) -> Self {
        Self {
            results,
            page_query: page_query.to_owned(),
            style: Style::default(),
            engine_errors_info: engine_errors_info.to_owned(),
            disallowed: Default::default(),
            filtered: Default::default(),
            safe_search_level: Default::default(),
            no_engines_selected: Default::default(),
        }
    }

    /// A setter function to add website style to the return search results.
    pub fn add_style(&mut self, style: &Style) {
        self.style = style.clone();
    }

    /// A setter function that sets disallowed to true.
    pub fn set_disallowed(&mut self) {
        self.disallowed = true;
    }

    /// A setter function to set the current page search query.
    pub fn set_page_query(&mut self, page: &str) {
        self.page_query = page.to_owned();
    }

    /// A setter function that sets the filtered to true.
    pub fn set_filtered(&mut self) {
        self.filtered = true;
    }

    /// A getter function that gets the value of `engine_errors_info`.
    pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
        std::mem::take(&mut self.engine_errors_info)
    }
    /// A getter function that gets the value of `results`.
    pub fn results(&mut self) -> Vec<SearchResult> {
        self.results.clone()
    }

    /// A setter function to set the current page safe search level.
    pub fn set_safe_search_level(&mut self, safe_search_level: u8) {
        self.safe_search_level = safe_search_level;
    }

    /// A getter function that gets the value of `no_engines_selected`.
    pub fn no_engines_selected(&self) -> bool {
        self.no_engines_selected
    }

    /// A setter function to set the `no_engines_selected` to true.
    pub fn set_no_engines_selected(&mut self) {
        self.no_engines_selected = true;
    }
}
Improving source code documentation. 2023-04-27 10:53:28 -04:00			`//! This module provides public models for handling, storing and serializing of search results`
			`//! data scraped from the upstream search engines.`

add code to evade ip blocking, improve pagination code and fix documentation 2023-05-02 04:58:21 -04:00			`use serde::{Deserialize, Serialize};`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`use smallvec::SmallVec;`
initial commit 2023-04-22 07:35:07 -04:00
⚙️ refactor: reorganize code & restructure codebase for better maintainability (#207) 2023-09-03 13:50:50 -04:00			`use super::{engine_models::EngineError, parser_models::Style};`
replace commandline arguments with config.lua & add support for changing themes & coloschemes 2023-04-30 11:16:08 -04:00
Improving source code documentation. 2023-04-27 10:53:28 -04:00			`/// A named struct to store the raw scraped search results scraped search results from the`
			`/// upstream search engines before aggregating it.It derives the Clone trait which is needed`
			/// to write idiomatic rust using `Iterators`.
			`/// (href url in html in simple words).`
Improve aggregation Adds the EngineHandler struct Removes vulnerability where an attacker could send requests cookies with fake engine names and crash the server. Merged RawSearchResult and SearchResult, as they were functionally identical. 2023-08-18 04:43:53 -04:00			`#[derive(Clone, Serialize, Deserialize)]`
initial commit 2023-04-22 07:35:07 -04:00			`#[serde(rename_all = "camelCase")]`
			`pub struct SearchResult {`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// The title of the search result.`
initial commit 2023-04-22 07:35:07 -04:00			`pub title: String,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// The url which is accessed when clicked on it`
initial commit 2023-04-22 07:35:07 -04:00			`pub url: String,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// The description of the search result.`
initial commit 2023-04-22 07:35:07 -04:00			`pub description: String,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// The names of the upstream engines from which this results were provided.`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`pub engine: SmallVec<[String; 0]>,`
initial commit 2023-04-22 07:35:07 -04:00			`}`

Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`impl SearchResult {`
Improving source code documentation. 2023-04-27 10:53:28 -04:00			/// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
			`///`
			`/// # Arguments`
			`///`
			/// * `title` - The title of the search result.
Improve Aggregation function & config parser Refactor aggregation function Rename visiting_url to url, as they are always the same (see upstream engine scalping). Refactor parsing function to be more readable. 2023-08-17 16:48:20 -04:00			/// * `url` - The url which is accessed when clicked on it
Improving source code documentation. 2023-04-27 10:53:28 -04:00			`/// (href url in html in simple words).`
			/// * `description` - The description of the search result.
			/// * `engine` - The names of the upstream engines from which this results were provided.
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`SearchResult {`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`title: title.to_owned(),`
			`url: url.to_owned(),`
			`description: description.to_owned(),`
			`engine: engine.iter().map(\|name\| name.to_string()).collect(),`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`}`
			`}`
Improving source code documentation. 2023-04-27 10:53:28 -04:00
			`/// A function which adds the engine name provided as a string into a vector of strings.`
			`///`
			`/// # Arguments`
			`///`
			/// * `engine` - Takes an engine name provided as a String.
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`pub fn add_engines(&mut self, engine: &str) {`
			`self.engine.push(engine.to_owned())`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`}`
Adding minor change and implementing Cloning ability. 2023-04-26 10:46:49 -04:00
add missing documentation in config.lua and application source code 2023-04-30 12:24:16 -04:00			`/// A function which returns the engine name stored from the struct as a string.`
			`///`
			`/// # Returns`
			`///`
			`/// An engine name stored as a string from the struct.`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`pub fn engine(&mut self) -> String {`
			`std::mem::take(&mut self.engine[0])`
Adding minor change and implementing Cloning ability. 2023-04-26 10:46:49 -04:00			`}`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`}`

⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// A named struct that stores the error info related to the upstream search engines.`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`#[derive(Serialize, Deserialize, Clone)]`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`pub struct EngineErrorInfo {`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// It stores the error type which occured while fetching the result from a particular search`
			`/// engine.`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`pub error: String,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// It stores the name of the engine that failed to provide the requested search results.`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`pub engine: String,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// It stores the name of the color to indicate whether how severe the particular error is (In`
			`/// other words it indicates the severity of the error/issue).`
✨ feat: add images, error_box & new message when no results are provided (#185) 2023-08-09 21:32:47 -04:00			`pub severity_color: String,`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`}`

			`impl EngineErrorInfo {`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			/// Constructs a new `SearchResult` with the given arguments needed for the struct.
			`///`
			`/// # Arguments`
			`///`
			/// * `error` - It takes the error type which occured while fetching the result from a particular
			`/// search engine.`
			/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`pub fn new(error: &EngineError, engine: &str) -> Self {`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`Self {`
			`error: match error {`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`EngineError::RequestError => "RequestError".to_owned(),`
			`EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),`
			`EngineError::UnexpectedError => "UnexpectedError".to_owned(),`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`},`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`engine: engine.to_owned(),`
✨ feat: add images, error_box & new message when no results are provided (#185) 2023-08-09 21:32:47 -04:00			`severity_color: match error {`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`EngineError::RequestError => "green".to_owned(),`
			`EngineError::EmptyResultSet => "blue".to_owned(),`
			`EngineError::UnexpectedError => "red".to_owned(),`
✨ feat: add images, error_box & new message when no results are provided (#185) 2023-08-09 21:32:47 -04:00			`},`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`}`
			`}`
			`}`

make format happy 2023-05-23 05:34:46 -04:00			`/// A named struct to store, serialize, deserialize the all the search results scraped and`
add code to evade ip blocking, improve pagination code and fix documentation 2023-05-02 04:58:21 -04:00			`/// aggregated from the upstream search engines.`
Improving source code documentation. 2023-04-27 10:53:28 -04:00			/// `SearchResult` structs.
Instead of caching jsons, we can cache the original structure 2023-09-11 17:20:05 -04:00			`#[derive(Serialize, Deserialize, Default, Clone)]`
initial commit 2023-04-22 07:35:07 -04:00			`#[serde(rename_all = "camelCase")]`
			`pub struct SearchResults {`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			/// Stores the individual serializable `SearchResult` struct into a vector of
initial commit 2023-04-22 07:35:07 -04:00			`pub results: Vec<SearchResult>,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			/// Stores the current pages search query `q` provided in the search url.
initial commit 2023-04-22 07:35:07 -04:00			`pub page_query: String,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// Stores the theming options for the website.`
replace commandline arguments with config.lua & add support for changing themes & coloschemes 2023-04-30 11:16:08 -04:00			`pub style: Style,`
⚙️ refactor: change & add documentation to the code based on the lints (#205) 2023-09-03 12:23:34 -04:00			`/// Stores the information on which engines failed with their engine name`
			`/// and the type of error that caused it.`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`pub engine_errors_info: Vec<EngineErrorInfo>,`
🧹 chore: make github actions happy (#205) 2023-09-12 10:59:33 -04:00			`/// Stores the flag option which holds the check value that the following`
			`/// search query was disallowed when the safe search level set to 4 and it`
Merge branch 'rolling' into change-document-style-with-linter-warnings 2023-09-12 10:49:46 -04:00			/// was present in the `Blocklist` file.
✨ feat: implement new fields, traits and functions (#201) 2023-09-02 10:48:27 -04:00			`pub disallowed: bool,`
🧹 chore: make github actions happy (#205) 2023-09-12 10:59:33 -04:00			`/// Stores the flag option which holds the check value that the following`
			`/// search query was filtered when the safe search level set to 3 and it`
Merge branch 'rolling' into change-document-style-with-linter-warnings 2023-09-12 10:49:46 -04:00			/// was present in the `Blocklist` file.
✨ feat: implement new fields, traits and functions (#201) 2023-09-02 10:48:27 -04:00			`pub filtered: bool,`
✨ feat: add new check value for no engine selected (#227) 2023-09-23 05:48:01 -04:00			/// Stores the safe search level `safesearch` provided in the search url.
			`pub safe_search_level: u8,`
			`/// Stores the flag option which holds the check value that whether any search engines were`
			`/// selected or not.`
			`pub no_engines_selected: bool,`
initial commit 2023-04-22 07:35:07 -04:00			`}`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00
			`impl SearchResults {`
Improving source code documentation. 2023-04-27 10:53:28 -04:00			/// Constructs a new `SearchResult` with the given arguments needed for the struct.
			`///`
			`/// # Arguments`
			`///`
			/// * `results` - Takes an argument of individual serializable `SearchResult` struct
			/// and stores it into a vector of `SearchResult` structs.
			/// * `page_query` - Takes an argument of current page`s search query `q` provided in
			`/// the search url.`
🧹 chore: make github actions happy (#205) 2023-09-12 10:59:33 -04:00			/// * `engine_errors_info` - Takes an array of structs which contains information regarding
			`/// which engines failed with their names, reason and their severity color name.`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`pub fn new(`
			`results: Vec<SearchResult>,`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`page_query: &str,`
			`engine_errors_info: &[EngineErrorInfo],`
✨ feat: add setter functions to SearchResults struct 2023-07-14 14:26:29 -04:00			`) -> Self {`
⚙️ refactor: replace vecs with smallvecs for smaller data sizes & replace to_strings with to_owned (#180)(#178) 2023-08-27 14:02:23 -04:00			`Self {`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`results,`
🧹 chore: make clippy happy (#201) 2023-09-10 11:56:54 -04:00			`page_query: page_query.to_owned(),`
✨ feat: implement new fields, traits and functions (#201) 2023-09-02 10:48:27 -04:00			`style: Style::default(),`
🧹 chore: make clippy happy (#201) 2023-09-10 11:56:54 -04:00			`engine_errors_info: engine_errors_info.to_owned(),`
✨ feat: implement new fields, traits and functions (#201) 2023-09-02 10:48:27 -04:00			`disallowed: Default::default(),`
			`filtered: Default::default(),`
✨ feat: add new check value for no engine selected (#227) 2023-09-23 05:48:01 -04:00			`safe_search_level: Default::default(),`
			`no_engines_selected: Default::default(),`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`}`
			`}`
replace commandline arguments with config.lua & add support for changing themes & coloschemes 2023-04-30 11:16:08 -04:00
✨ feat: add documentation to code 2023-07-15 06:36:46 -04:00			`/// A setter function to add website style to the return search results.`
✨ feat: implement new fields, traits and functions (#201) 2023-09-02 10:48:27 -04:00			`pub fn add_style(&mut self, style: &Style) {`
			`self.style = style.clone();`
			`}`

			`/// A setter function that sets disallowed to true.`
			`pub fn set_disallowed(&mut self) {`
			`self.disallowed = true;`
			`}`

			`/// A setter function to set the current page search query.`
			`pub fn set_page_query(&mut self, page: &str) {`
			`self.page_query = page.to_owned();`
			`}`

			`/// A setter function that sets the filtered to true.`
			`pub fn set_filtered(&mut self) {`
			`self.filtered = true;`
			`}`

			/// A getter function that gets the value of `engine_errors_info`.
			`pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {`
			`std::mem::take(&mut self.engine_errors_info)`
			`}`
			/// A getter function that gets the value of `results`.
			`pub fn results(&mut self) -> Vec<SearchResult> {`
			`self.results.clone()`
replace commandline arguments with config.lua & add support for changing themes & coloschemes 2023-04-30 11:16:08 -04:00			`}`
✨ feat: add new check value for no engine selected (#227) 2023-09-23 05:48:01 -04:00
			`/// A setter function to set the current page safe search level.`
			`pub fn set_safe_search_level(&mut self, safe_search_level: u8) {`
			`self.safe_search_level = safe_search_level;`
			`}`

			/// A getter function that gets the value of `no_engines_selected`.
			`pub fn no_engines_selected(&self) -> bool {`
			`self.no_engines_selected`
			`}`

			/// A setter function to set the `no_engines_selected` to true.
			`pub fn set_no_engines_selected(&mut self) {`
			`self.no_engines_selected = true;`
			`}`
Refactoring code and separating code into files for better maintainability 2023-04-25 09:30:04 -04:00			`}`