0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-12-22 20:38:22 -05:00

feat: add documentation to code

This commit is contained in:
neon_arch 2023-07-15 13:36:46 +03:00
parent 8133de1758
commit 94ef62eec9
7 changed files with 91 additions and 45 deletions

View File

@ -18,6 +18,10 @@ static CONFIG_FILE_NAME: &str = "config.lua";
/// * `style` - It stores the theming options for the website.
/// * `redis_connection_url` - It stores the redis connection url address on which the redis
/// client should connect.
/// * `aggregator` - It stores the option to whether enable or disable production use.
/// * `logging` - It stores the option to whether enable or disable logs.
/// * `debug` - It stores the option to whether enable or disable debug mode.
/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
#[derive(Clone)]
pub struct Config {
pub port: u16,
@ -31,9 +35,13 @@ pub struct Config {
}
/// Configuration options for the aggregator.
///
/// # Fields
///
/// * `random_delay` - It stores the option to whether enable or disable random delays between
/// requests.
#[derive(Clone)]
pub struct AggreatorConfig {
/// Whether to introduce a random delay before sending the request to the search engine.
pub random_delay: bool,
}

View File

@ -13,28 +13,29 @@ use super::engine_models::{EngineError, SearchEngine};
use error_stack::{IntoReport, Report, Result, ResultExt};
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
/// values are RawSearchResult struct and then returns it within a Result enum.
///
/// # Arguments
///
/// * `query` - Takes the user provided query to query to the upstream search engine with.
/// * `page` - Takes an u32 as an argument.
/// * `user_agent` - Takes a random user agent string as an argument.
///
/// # Errors
///
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
/// provide results for the requested search query and also returns error if the scraping selector
/// or HeaderMap fails to initialize.
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
/// reduce code duplication as well as allows to create vector of different search engines easily.
pub struct DuckDuckGo;
#[async_trait::async_trait]
impl SearchEngine for DuckDuckGo {
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
/// values are RawSearchResult struct and then returns it within a Result enum.
///
/// # Arguments
///
/// * `query` - Takes the user provided query to query to the upstream search engine with.
/// * `page` - Takes an u32 as an argument.
/// * `user_agent` - Takes a random user agent string as an argument.
///
/// # Errors
///
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
/// provide results for the requested search query and also returns error if the scraping selector
/// or HeaderMap fails to initialize.
async fn results(
&self,
query: String,

View File

@ -43,6 +43,7 @@ impl fmt::Display for EngineError {
impl error_stack::Context for EngineError {}
/// A trait to define common behaviour for all search engines.
#[async_trait::async_trait]
pub trait SearchEngine {
async fn fetch_html_from_upstream(
@ -53,7 +54,7 @@ pub trait SearchEngine {
// fetch the html from upstream search engine
Ok(reqwest::Client::new()
.get(url)
.timeout(Duration::from_secs(30))
.timeout(Duration::from_secs(30)) // Add timeout to request to avoid DDOSing the server
.headers(header_map) // add spoofed headers to emulate human behaviour
.send()
.await

View File

@ -11,28 +11,30 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
use super::engine_models::{EngineError, SearchEngine};
use error_stack::{IntoReport, Report, Result, ResultExt};
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
/// values are RawSearchResult struct and then returns it within a Result enum.
///
/// # Arguments
///
/// * `query` - Takes the user provided query to query to the upstream search engine with.
/// * `page` - Takes an u32 as an argument.
/// * `user_agent` - Takes a random user agent string as an argument.
///
/// # Errors
///
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
/// provide results for the requested search query and also returns error if the scraping selector
/// or HeaderMap fails to initialize.
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
/// reduce code duplication as well as allows to create vector of different search engines easily.
pub struct Searx;
#[async_trait::async_trait]
impl SearchEngine for Searx {
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
/// values are RawSearchResult struct and then returns it within a Result enum.
///
/// # Arguments
///
/// * `query` - Takes the user provided query to query to the upstream search engine with.
/// * `page` - Takes an u32 as an argument.
/// * `user_agent` - Takes a random user agent string as an argument.
///
/// # Errors
///
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
/// provide results for the requested search query and also returns error if the scraping selector
/// or HeaderMap fails to initialize.
async fn results(
&self,
query: String,

View File

@ -143,6 +143,11 @@ impl EngineErrorInfo {
/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
/// `SearchResult` structs.
/// * `page_query` - Stores the current pages search query `q` provided in the search url.
/// * `style` - Stores the theming options for the website.
/// * `engine_errors_info` - Stores the information on which engines failed with their engine name
/// and the type of error that caused it.
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
/// given search query.
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResults {
@ -162,6 +167,8 @@ impl SearchResults {
/// and stores it into a vector of `SearchResult` structs.
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
/// the search url.
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
/// given search query.
pub fn new(
results: Vec<SearchResult>,
page_query: String,
@ -176,14 +183,17 @@ impl SearchResults {
}
}
/// A setter function to add website style to the return search results.
pub fn add_style(&mut self, style: Style) {
self.style = style;
}
/// A function which checks whether the results stored are empty or not.
pub fn is_empty_result_set(&self) -> bool {
self.results.is_empty()
}
/// A setter function which sets the empty_result_set to true.
pub fn set_empty_result_set(&mut self) {
self.empty_result_set = true;
}

View File

@ -18,14 +18,21 @@ use crate::engines::{
searx,
};
/// Aliases for long type annotations
type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<EngineError>>>>;
/// A function that aggregates all the scraped results from the above upstream engines and
/// then removes duplicate results and if two results are found to be from two or more engines
/// then puts their names together to show the results are fetched from these upstream engines
/// and then removes all data from the HashMap and puts into a struct of all results aggregated
/// into a vector and also adds the query used into the struct this is neccessory because
/// otherwise the search bar in search remains empty if searched from the query url
/// A function that aggregates all the scraped results from the above user selected upstream
/// search engines either selected from the UI or from the config file which is handled by the code
/// by matching over the selected search engines and adding the selected ones to the vector which
/// is then used to create an async task vector with `tokio::spawn` which returns a future which
/// is then awaited on in another loop and then all the collected results is filtered for errors
/// and proper results and if an error is found is then sent to the UI with the engine name and the
/// error type that caused it by putting them finallt in the returned `SearchResults` struct. Also
/// the same process also removes duplicate results and if two results are found to be from two or
/// more engines then puts their names together to show the results are fetched from these upstream
/// engines and then removes all data from the HashMap and puts into a struct of all results aggregated
/// into a vector and also adds the query used into the struct this is neccessory because otherwise the
/// search bar in search remains empty if searched from the query url.
///
/// # Example:
///
@ -37,6 +44,9 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<
/// * `query` - Accepts a string to query with the above upstream search engines.
/// * `page` - Accepts an u32 page number.
/// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
/// user through the UI or the config file.
///
/// # Error
///

View File

@ -51,6 +51,13 @@ pub async fn not_found(
.body(page_content))
}
/// A named struct which is used to deserialize the cookies fetched from the client side.
///
/// # Fields
///
/// * `theme` - It stores the theme name used in the website.
/// * `colorscheme` - It stores the colorscheme name used for the website theme.
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
#[allow(dead_code)]
#[derive(Deserialize)]
struct Cookie {
@ -126,7 +133,7 @@ pub async fn search(
// fetch the cached results json.
let cached_results_json = redis_cache.cached_results_json(&page_url);
// check if fetched results was indeed fetched or it was an error and if so
// check if fetched catch results was indeed fetched or it was an error and if so
// handle the data accordingly.
match cached_results_json {
Ok(results_json) => {
@ -135,6 +142,10 @@ pub async fn search(
Ok(HttpResponse::Ok().body(page_content))
}
Err(_) => {
// check if the cookie value is empty or not if it is empty then use the
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
// UI and use that.
let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") {
Some(cookie_value) => {
let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?;
@ -143,6 +154,9 @@ pub async fn search(
None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?,
};
results_json.add_style(config.style.clone());
// check whether the results grabbed from the upstream engines are empty or
// not if they are empty then set the empty_result_set option to true in
// the result json.
if results_json.is_empty_result_set() {
results_json.set_empty_result_set();
}