0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-21 21:48:21 -05:00

Improving source code documentation.

This commit is contained in:
neon_arch 2023-04-27 17:53:28 +03:00
parent ed13a16ec5
commit fc69acea8f
7 changed files with 160 additions and 18 deletions

View File

@ -1,3 +1,8 @@
//! Main module of the application
//!
//! This module contains the main function which handles the logging of the application to the
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
use std::ops::RangeInclusive;
use websurfx::server::routes;
@ -8,6 +13,7 @@ use clap::{command, Parser};
use env_logger::Env;
use handlebars::Handlebars;
/// A commandline arguments struct.
#[derive(Parser, Debug, Default)]
#[clap(author = "neon_arch", version, about = "Websurfx server application")]
#[command(propagate_version = true)]
@ -19,8 +25,18 @@ struct CliArgs {
const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
// A function to check whether port is valid u32 number or is in range
// between [1024-65536] otherwise display an appropriate error message.
/// A function to check whether port is valid u32 number or is in range
/// between [1024-65536] otherwise display an appropriate error message.
///
/// # Arguments
///
/// * `s` - Takes a commandline argument port as a string.
///
/// # Error
///
/// Check whether the provided argument to `--port` commandline option is a valid
/// u16 argument and returns it as a u16 value otherwise returns an error with an
/// appropriate error message.
fn is_port_in_range(s: &str) -> Result<u16, String> {
let port: usize = s
.parse()
@ -36,7 +52,12 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
}
}
// The function that launches the main server and handle routing functionality
/// The function that launches the main server and registers all the routes of the website.
///
/// # Error
///
/// Returns an error if the port is being used by something else on the system and is not
/// available for being used for other applications.
#[actix_web::main]
async fn main() -> std::io::Result<()> {
let args = CliArgs::parse();
@ -68,7 +89,7 @@ async fn main() -> std::io::Result<()> {
.service(routes::settings) // settings page
.default_service(web::route().to(routes::not_found)) // error page
})
// Start server on 127.0.0.1:8080
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
.bind(("127.0.0.1", args.port))?
.run()
.await

View File

@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
/// # Errors
///
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
/// reach the above **upstream search engine** page and also returns error if the scraping
/// reach the above `upstream search engine` page and also returns error if the scraping
/// selector fails to initialize"
pub async fn results(
query: &str,

View File

@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
/// # Errors
///
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
/// reach the above **upstream search engine** page and also returns error if the scraping
/// reach the above `upstream search engine` page and also returns error if the scraping
/// selector fails to initialize"
pub async fn results(
query: &str,

View File

@ -1,5 +1,19 @@
//! This module provides public models for handling, storing and serializing of search results
//! data scraped from the upstream search engines.
use serde::Serialize;
/// A named struct to store and serialize the individual search result from all the scraped
/// and aggregated search results from the upstream search engines.
///
/// # Fields
///
/// * `title` - The title of the search result.
/// * `visiting_url` - The url which is accessed when clicked on it (href url in html in simple
/// words).
/// * `url` - The url to be displayed below the search result title in html.
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
@ -11,6 +25,16 @@ pub struct SearchResult {
}
impl SearchResult {
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
///
/// # Arguments
///
/// * `title` - The title of the search result.
/// * `visiting_url` - The url which is accessed when clicked on it
/// (href url in html in simple words).
/// * `url` - The url to be displayed below the search result title in html.
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
pub fn new(
title: String,
visiting_url: String,
@ -28,6 +52,17 @@ impl SearchResult {
}
}
/// A named struct to store the raw scraped search results scraped search results from the
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
/// to write idiomatic rust using `Iterators`.
///
/// # Fields
///
/// * `title` - The title of the search result.
/// * `visiting_url` - The url which is accessed when clicked on it
/// (href url in html in simple words).
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
#[derive(Clone)]
pub struct RawSearchResult {
pub title: String,
@ -37,6 +72,15 @@ pub struct RawSearchResult {
}
impl RawSearchResult {
/// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
///
/// # Arguments
///
/// * `title` - The title of the search result.
/// * `visiting_url` - The url which is accessed when clicked on it
/// (href url in html in simple words).
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
pub fn new(
title: String,
visiting_url: String,
@ -50,6 +94,12 @@ impl RawSearchResult {
engine,
}
}
/// A function which adds the engine name provided as a string into a vector of strings.
///
/// # Arguments
///
/// * `engine` - Takes an engine name provided as a String.
pub fn add_engines(&mut self, engine: String) {
self.engine.push(engine)
}
@ -59,6 +109,14 @@ impl RawSearchResult {
}
}
/// A named struct to store and serialize the all the search results scraped and aggregated
/// from the upstream search engines.
///
/// # Fields
///
/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
/// `SearchResult` structs.
/// * `page_query` - Stores the current pages search query `q` provided in the search url.
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResults {
@ -67,6 +125,14 @@ pub struct SearchResults {
}
impl SearchResults {
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
///
/// # Arguments
///
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
/// and stores it into a vector of `SearchResult` structs.
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
/// the search url.
pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
SearchResults {
results,

View File

@ -1,3 +1,6 @@
//! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results.
use std::collections::HashMap;
use super::{
@ -7,17 +10,28 @@ use super::{
use crate::engines::{duckduckgo, searx};
// A function that aggregates all the scraped results from the above upstream engines and
// then removes duplicate results and if two results are found to be from two or more engines
// then puts their names together to show the results are fetched from these upstream engines
// and then removes all data from the HashMap and puts into a struct of all results aggregated
// into a vector and also adds the query used into the struct this is neccessory because otherwise
// the search bar in search remains empty if searched from the query url
//
// For Example:
//
// If you search from the url like *https://127.0.0.1/search?q=huston* then the search bar should
// contain the word huston and not remain empty.
/// A function that aggregates all the scraped results from the above upstream engines and
/// then removes duplicate results and if two results are found to be from two or more engines
/// then puts their names together to show the results are fetched from these upstream engines
/// and then removes all data from the HashMap and puts into a struct of all results aggregated
/// into a vector and also adds the query used into the struct this is neccessory because
/// otherwise the search bar in search remains empty if searched from the query url
///
/// # Example:
///
/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
/// contain the word huston and not remain empty.
///
/// # Arguments
///
/// * `query` - Accepts a string to query with the above upstream search engines.
/// * `page` - Accepts an Option<u32> which could either be a None or a valid page number.
///
/// # Error
///
/// Returns an error a reqwest and scraping selector errors if any error occurs in the results
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
/// containing appropriate values.
pub async fn aggregate(
query: &str,
page: Option<u32>,

View File

@ -1,6 +1,12 @@
//! This module provides the functionality to generate random user agent string.
use fake_useragent::{Browsers, UserAgentsBuilder};
// A function to generate random user agent to improve privacy of the user.
/// A function to generate random user agent to improve privacy of the user.
///
/// # Returns
///
/// A randomly generated user agent string.
pub fn random_user_agent() -> String {
UserAgentsBuilder::new()
.cache(false)

View File

@ -1,3 +1,7 @@
//! This module provides the functionality to handle different routes of the `websurfx`
//! meta search engine website and provide approriate response to each route/page
//! when requested.
use std::fs::read_to_string;
use crate::search_results_handler::aggregator::aggregate;
@ -5,12 +9,21 @@ use actix_web::{get, web, HttpRequest, HttpResponse};
use handlebars::Handlebars;
use serde::Deserialize;
/// A named struct which deserializes all the user provided search parameters and stores them.
///
/// # Fields
///
/// * `q` - It stores the search parameter option `q` (or query in simple words)
/// of the search url.
/// * `page` - It stores the search parameter `page` (or pageno in simple words)
/// of the search url.
#[derive(Debug, Deserialize)]
struct SearchParams {
q: Option<String>,
page: Option<u32>,
}
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")]
pub async fn index(
hbs: web::Data<Handlebars<'_>>,
@ -19,6 +32,8 @@ pub async fn index(
Ok(HttpResponse::Ok().body(page_content))
}
/// Handles the route of any other accessed route/page which is not provided by the
/// website essentially the 404 error page.
pub async fn not_found(
hbs: web::Data<Handlebars<'_>>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
@ -29,6 +44,20 @@ pub async fn not_found(
.body(page_content))
}
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
/// two search url parameters `q` and `page` where `page` parameter is optional.
///
/// # Example
///
/// ```bash
/// curl "http://127.0.0.1:8080/search?q=sweden&page=1"
/// ```
///
/// Or
///
/// ```bash
/// curl "http://127.0.0.1:8080/search?q=sweden"
/// ```
#[get("/search")]
pub async fn search(
hbs: web::Data<Handlebars<'_>>,
@ -54,6 +83,7 @@ pub async fn search(
}
}
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
#[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = read_to_string("./public/robots.txt")?;
@ -62,6 +92,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
.body(page_content))
}
/// Handles the route of about page of the `websurfx` meta search engine website.
#[get("/about")]
pub async fn about(
hbs: web::Data<Handlebars<'_>>,
@ -70,6 +101,7 @@ pub async fn about(
Ok(HttpResponse::Ok().body(page_content))
}
/// Handles the route of settings page of the `websurfx` meta search engine website.
#[get("/settings")]
pub async fn settings(
hbs: web::Data<Handlebars<'_>>,
@ -77,3 +109,6 @@ pub async fn settings(
let page_content: String = hbs.render("settings", &"")?;
Ok(HttpResponse::Ok().body(page_content))
}
// TODO: Write tests for tesing parameters for search function that if provided with something
// other than u32 like alphabets and special characters than it should panic