diff --git a/src/bin/websurfx.rs b/src/bin/websurfx.rs index 18ccf6f..88602cb 100644 --- a/src/bin/websurfx.rs +++ b/src/bin/websurfx.rs @@ -1,3 +1,8 @@ +//! Main module of the application +//! +//! This module contains the main function which handles the logging of the application to the +//! stdout and handles the command line arguments provided and launches the `websurfx` server. + use std::ops::RangeInclusive; use websurfx::server::routes; @@ -8,6 +13,7 @@ use clap::{command, Parser}; use env_logger::Env; use handlebars::Handlebars; +/// A commandline arguments struct. #[derive(Parser, Debug, Default)] #[clap(author = "neon_arch", version, about = "Websurfx server application")] #[command(propagate_version = true)] @@ -19,8 +25,18 @@ struct CliArgs { const PORT_RANGE: RangeInclusive = 1024..=65535; -// A function to check whether port is valid u32 number or is in range -// between [1024-65536] otherwise display an appropriate error message. +/// A function to check whether port is valid u32 number or is in range +/// between [1024-65536] otherwise display an appropriate error message. +/// +/// # Arguments +/// +/// * `s` - Takes a commandline argument port as a string. +/// +/// # Error +/// +/// Check whether the provided argument to `--port` commandline option is a valid +/// u16 argument and returns it as a u16 value otherwise returns an error with an +/// appropriate error message. fn is_port_in_range(s: &str) -> Result { let port: usize = s .parse() @@ -36,7 +52,12 @@ fn is_port_in_range(s: &str) -> Result { } } -// The function that launches the main server and handle routing functionality +/// The function that launches the main server and registers all the routes of the website. +/// +/// # Error +/// +/// Returns an error if the port is being used by something else on the system and is not +/// available for being used for other applications. #[actix_web::main] async fn main() -> std::io::Result<()> { let args = CliArgs::parse(); @@ -68,7 +89,7 @@ async fn main() -> std::io::Result<()> { .service(routes::settings) // settings page .default_service(web::route().to(routes::not_found)) // error page }) - // Start server on 127.0.0.1:8080 + // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080. .bind(("127.0.0.1", args.port))? .run() .await diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 9ce62d4..6f227d6 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult; /// # Errors /// /// Returns a reqwest error if the user is not connected to the internet or if their is failure to -/// reach the above **upstream search engine** page and also returns error if the scraping +/// reach the above `upstream search engine` page and also returns error if the scraping /// selector fails to initialize" pub async fn results( query: &str, diff --git a/src/engines/searx.rs b/src/engines/searx.rs index a03f127..bfba1c6 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult; /// # Errors /// /// Returns a reqwest error if the user is not connected to the internet or if their is failure to -/// reach the above **upstream search engine** page and also returns error if the scraping +/// reach the above `upstream search engine` page and also returns error if the scraping /// selector fails to initialize" pub async fn results( query: &str, diff --git a/src/search_results_handler/aggregation_models.rs b/src/search_results_handler/aggregation_models.rs index 4dbad48..2bf3055 100644 --- a/src/search_results_handler/aggregation_models.rs +++ b/src/search_results_handler/aggregation_models.rs @@ -1,5 +1,19 @@ +//! This module provides public models for handling, storing and serializing of search results +//! data scraped from the upstream search engines. + use serde::Serialize; +/// A named struct to store and serialize the individual search result from all the scraped +/// and aggregated search results from the upstream search engines. +/// +/// # Fields +/// +/// * `title` - The title of the search result. +/// * `visiting_url` - The url which is accessed when clicked on it (href url in html in simple +/// words). +/// * `url` - The url to be displayed below the search result title in html. +/// * `description` - The description of the search result. +/// * `engine` - The names of the upstream engines from which this results were provided. #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SearchResult { @@ -11,6 +25,16 @@ pub struct SearchResult { } impl SearchResult { + /// Constructs a new `SearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `title` - The title of the search result. + /// * `visiting_url` - The url which is accessed when clicked on it + /// (href url in html in simple words). + /// * `url` - The url to be displayed below the search result title in html. + /// * `description` - The description of the search result. + /// * `engine` - The names of the upstream engines from which this results were provided. pub fn new( title: String, visiting_url: String, @@ -28,6 +52,17 @@ impl SearchResult { } } +/// A named struct to store the raw scraped search results scraped search results from the +/// upstream search engines before aggregating it.It derives the Clone trait which is needed +/// to write idiomatic rust using `Iterators`. +/// +/// # Fields +/// +/// * `title` - The title of the search result. +/// * `visiting_url` - The url which is accessed when clicked on it +/// (href url in html in simple words). +/// * `description` - The description of the search result. +/// * `engine` - The names of the upstream engines from which this results were provided. #[derive(Clone)] pub struct RawSearchResult { pub title: String, @@ -37,6 +72,15 @@ pub struct RawSearchResult { } impl RawSearchResult { + /// Constructs a new `RawSearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `title` - The title of the search result. + /// * `visiting_url` - The url which is accessed when clicked on it + /// (href url in html in simple words). + /// * `description` - The description of the search result. + /// * `engine` - The names of the upstream engines from which this results were provided. pub fn new( title: String, visiting_url: String, @@ -50,6 +94,12 @@ impl RawSearchResult { engine, } } + + /// A function which adds the engine name provided as a string into a vector of strings. + /// + /// # Arguments + /// + /// * `engine` - Takes an engine name provided as a String. pub fn add_engines(&mut self, engine: String) { self.engine.push(engine) } @@ -59,6 +109,14 @@ impl RawSearchResult { } } +/// A named struct to store and serialize the all the search results scraped and aggregated +/// from the upstream search engines. +/// +/// # Fields +/// +/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of +/// `SearchResult` structs. +/// * `page_query` - Stores the current pages search query `q` provided in the search url. #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SearchResults { @@ -67,6 +125,14 @@ pub struct SearchResults { } impl SearchResults { + /// Constructs a new `SearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `results` - Takes an argument of individual serializable `SearchResult` struct + /// and stores it into a vector of `SearchResult` structs. + /// * `page_query` - Takes an argument of current page`s search query `q` provided in + /// the search url. pub fn new(results: Vec, page_query: String) -> Self { SearchResults { results, diff --git a/src/search_results_handler/aggregator.rs b/src/search_results_handler/aggregator.rs index 2986a7c..096c5c7 100644 --- a/src/search_results_handler/aggregator.rs +++ b/src/search_results_handler/aggregator.rs @@ -1,3 +1,6 @@ +//! This module provides the functionality to scrape and gathers all the results from the upstream +//! search engines and then removes duplicate results. + use std::collections::HashMap; use super::{ @@ -7,17 +10,28 @@ use super::{ use crate::engines::{duckduckgo, searx}; -// A function that aggregates all the scraped results from the above upstream engines and -// then removes duplicate results and if two results are found to be from two or more engines -// then puts their names together to show the results are fetched from these upstream engines -// and then removes all data from the HashMap and puts into a struct of all results aggregated -// into a vector and also adds the query used into the struct this is neccessory because otherwise -// the search bar in search remains empty if searched from the query url -// -// For Example: -// -// If you search from the url like *https://127.0.0.1/search?q=huston* then the search bar should -// contain the word huston and not remain empty. +/// A function that aggregates all the scraped results from the above upstream engines and +/// then removes duplicate results and if two results are found to be from two or more engines +/// then puts their names together to show the results are fetched from these upstream engines +/// and then removes all data from the HashMap and puts into a struct of all results aggregated +/// into a vector and also adds the query used into the struct this is neccessory because +/// otherwise the search bar in search remains empty if searched from the query url +/// +/// # Example: +/// +/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should +/// contain the word huston and not remain empty. +/// +/// # Arguments +/// +/// * `query` - Accepts a string to query with the above upstream search engines. +/// * `page` - Accepts an Option which could either be a None or a valid page number. +/// +/// # Error +/// +/// Returns an error a reqwest and scraping selector errors if any error occurs in the results +/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct` +/// containing appropriate values. pub async fn aggregate( query: &str, page: Option, diff --git a/src/search_results_handler/user_agent.rs b/src/search_results_handler/user_agent.rs index 1b147aa..09dd684 100644 --- a/src/search_results_handler/user_agent.rs +++ b/src/search_results_handler/user_agent.rs @@ -1,6 +1,12 @@ +//! This module provides the functionality to generate random user agent string. + use fake_useragent::{Browsers, UserAgentsBuilder}; -// A function to generate random user agent to improve privacy of the user. +/// A function to generate random user agent to improve privacy of the user. +/// +/// # Returns +/// +/// A randomly generated user agent string. pub fn random_user_agent() -> String { UserAgentsBuilder::new() .cache(false) diff --git a/src/server/routes.rs b/src/server/routes.rs index 05467c3..5a0914b 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -1,3 +1,7 @@ +//! This module provides the functionality to handle different routes of the `websurfx` +//! meta search engine website and provide approriate response to each route/page +//! when requested. + use std::fs::read_to_string; use crate::search_results_handler::aggregator::aggregate; @@ -5,12 +9,21 @@ use actix_web::{get, web, HttpRequest, HttpResponse}; use handlebars::Handlebars; use serde::Deserialize; +/// A named struct which deserializes all the user provided search parameters and stores them. +/// +/// # Fields +/// +/// * `q` - It stores the search parameter option `q` (or query in simple words) +/// of the search url. +/// * `page` - It stores the search parameter `page` (or pageno in simple words) +/// of the search url. #[derive(Debug, Deserialize)] struct SearchParams { q: Option, page: Option, } +/// Handles the route of index page or main page of the `websurfx` meta search engine website. #[get("/")] pub async fn index( hbs: web::Data>, @@ -19,6 +32,8 @@ pub async fn index( Ok(HttpResponse::Ok().body(page_content)) } +/// Handles the route of any other accessed route/page which is not provided by the +/// website essentially the 404 error page. pub async fn not_found( hbs: web::Data>, ) -> Result> { @@ -29,6 +44,20 @@ pub async fn not_found( .body(page_content)) } +/// Handles the route of search page of the `websurfx` meta search engine website and it takes +/// two search url parameters `q` and `page` where `page` parameter is optional. +/// +/// # Example +/// +/// ```bash +/// curl "http://127.0.0.1:8080/search?q=sweden&page=1" +/// ``` +/// +/// Or +/// +/// ```bash +/// curl "http://127.0.0.1:8080/search?q=sweden" +/// ``` #[get("/search")] pub async fn search( hbs: web::Data>, @@ -54,6 +83,7 @@ pub async fn search( } } +/// Handles the route of robots.txt page of the `websurfx` meta search engine website. #[get("/robots.txt")] pub async fn robots_data(_req: HttpRequest) -> Result> { let page_content: String = read_to_string("./public/robots.txt")?; @@ -62,6 +92,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result>, @@ -70,6 +101,7 @@ pub async fn about( Ok(HttpResponse::Ok().body(page_content)) } +/// Handles the route of settings page of the `websurfx` meta search engine website. #[get("/settings")] pub async fn settings( hbs: web::Data>, @@ -77,3 +109,6 @@ pub async fn settings( let page_content: String = hbs.render("settings", &"")?; Ok(HttpResponse::Ok().body(page_content)) } + +// TODO: Write tests for tesing parameters for search function that if provided with something +// other than u32 like alphabets and special characters than it should panic