From 493c56bd02c4748cf8fd88df40ae995c6107f5d9 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 20:50:50 +0300 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20refactor:=20reorganize?= =?UTF-8?q?=20code=20&=20restructure=20codebase=20for=20better=20maintaina?= =?UTF-8?q?bility=20(#207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/mod.rs | 1 - src/config/parser.rs | 6 +- src/engines/duckduckgo.rs | 4 +- src/engines/mod.rs | 1 - src/engines/searx.rs | 4 +- src/lib.rs | 15 +-- src/{results => models}/aggregation_models.rs | 2 +- src/{engines => models}/engine_models.rs | 9 +- src/models/mod.rs | 8 ++ src/{config => models}/parser_models.rs | 0 src/models/server_models.rs | 26 +++++ src/results/aggregator.rs | 22 ++--- src/results/mod.rs | 1 - src/server/mod.rs | 1 + src/server/router.rs | 64 +++++++++++++ src/server/routes/mod.rs | 3 + src/server/{routes.rs => routes/search.rs} | 96 ++----------------- 17 files changed, 141 insertions(+), 122 deletions(-) rename src/{results => models}/aggregation_models.rs (98%) rename src/{engines => models}/engine_models.rs (95%) create mode 100644 src/models/mod.rs rename src/{config => models}/parser_models.rs (100%) create mode 100644 src/models/server_models.rs create mode 100644 src/server/router.rs create mode 100644 src/server/routes/mod.rs rename src/server/{routes.rs => routes/search.rs} (62%) diff --git a/src/config/mod.rs b/src/config/mod.rs index 331a3d7..babc54f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2,4 +2,3 @@ //! and convert the config options into rust readable form. pub mod parser; -pub mod parser_models; diff --git a/src/config/parser.rs b/src/config/parser.rs index ca53f1b..72df890 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -3,7 +3,7 @@ use crate::handler::paths::{file_path, FileType}; -use super::parser_models::Style; +use crate::models::parser_models::Style; use log::LevelFilter; use rlua::Lua; use std::{collections::HashMap, fs, thread::available_parallelism}; @@ -27,7 +27,7 @@ pub struct Config { /// It stores the option to whether enable or disable debug mode. pub debug: bool, /// It stores all the engine names that were enabled by the user. - pub upstream_search_engines: Vec, + pub upstream_search_engines: Vec, /// It stores the time (secs) which controls the server request timeout. pub request_timeout: u8, /// It stores the number of threads which controls the app will use to run. @@ -99,7 +99,7 @@ impl Config { .get::<_, HashMap>("upstream_search_engines")? .into_iter() .filter_map(|(key, value)| value.then_some(key)) - .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine)) + .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine)) .collect(), request_timeout: globals.get::<_, u8>("request_timeout")?, threads, diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 5b7a452..66f0c85 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -7,9 +7,9 @@ use std::collections::HashMap; use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use scraper::{Html, Selector}; -use crate::results::aggregation_models::SearchResult; +use crate::models::aggregation_models::SearchResult; -use super::engine_models::{EngineError, SearchEngine}; +use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{IntoReport, Report, Result, ResultExt}; diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 8267c93..0016728 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -4,5 +4,4 @@ //! code. Moreover, it also provides a custom error for the upstream search engine handling code. pub mod duckduckgo; -pub mod engine_models; pub mod searx; diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 3f261ad..611c173 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -6,9 +6,9 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use scraper::{Html, Selector}; use std::collections::HashMap; -use crate::results::aggregation_models::SearchResult; +use crate::models::aggregation_models::SearchResult; -use super::engine_models::{EngineError, SearchEngine}; +use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{IntoReport, Report, Result, ResultExt}; /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to diff --git a/src/lib.rs b/src/lib.rs index 52fb56d..97208be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,12 +9,13 @@ pub mod cache; pub mod config; pub mod engines; pub mod handler; +pub mod models; pub mod results; pub mod server; use std::net::TcpListener; -use crate::server::routes; +use crate::server::router; use actix_cors::Cors; use actix_files as fs; @@ -81,12 +82,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { fs::Files::new("/images", format!("{}/images", public_folder_path)) .show_files_listing(), ) - .service(routes::robots_data) // robots.txt - .service(routes::index) // index page - .service(routes::search) // search page - .service(routes::about) // about page - .service(routes::settings) // settings page - .default_service(web::route().to(routes::not_found)) // error page + .service(router::robots_data) // robots.txt + .service(router::index) // index page + .service(router::search) // search page + .service(router::about) // about page + .service(router::settings) // settings page + .default_service(web::route().to(router::not_found)) // error page }) .workers(cloned_config_threads_opt as usize) // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080. diff --git a/src/results/aggregation_models.rs b/src/models/aggregation_models.rs similarity index 98% rename from src/results/aggregation_models.rs rename to src/models/aggregation_models.rs index 76d896d..51a4cc8 100644 --- a/src/results/aggregation_models.rs +++ b/src/models/aggregation_models.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; -use crate::{config::parser_models::Style, engines::engine_models::EngineError}; +use super::{engine_models::EngineError, parser_models::Style}; /// A named struct to store the raw scraped search results scraped search results from the /// upstream search engines before aggregating it.It derives the Clone trait which is needed diff --git a/src/engines/engine_models.rs b/src/models/engine_models.rs similarity index 95% rename from src/engines/engine_models.rs rename to src/models/engine_models.rs index 2f28ee5..f6f99d2 100644 --- a/src/engines/engine_models.rs +++ b/src/models/engine_models.rs @@ -1,7 +1,7 @@ //! This module provides the error enum to handle different errors associated while requesting data from //! the upstream search engines with the search query provided by the user. -use crate::results::aggregation_models::SearchResult; +use super::aggregation_models::SearchResult; use error_stack::{IntoReport, Result, ResultExt}; use std::{collections::HashMap, fmt, time::Duration}; @@ -138,8 +138,11 @@ impl EngineHandler { pub fn new(engine_name: &str) -> Option { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { - "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)), - "searx" => ("searx", Box::new(super::searx::Searx)), + "duckduckgo" => ( + "duckduckgo", + Box::new(crate::engines::duckduckgo::DuckDuckGo), + ), + "searx" => ("searx", Box::new(crate::engines::searx::Searx)), _ => return None, }; diff --git a/src/models/mod.rs b/src/models/mod.rs new file mode 100644 index 0000000..6a7d235 --- /dev/null +++ b/src/models/mod.rs @@ -0,0 +1,8 @@ +//! This module provides modules which in turn provides various models for aggregrating search +//! results, parsing config file, providing trait to standardize search engine handling code, +//! custom engine error for the search engine, etc. + +pub mod aggregation_models; +pub mod engine_models; +pub mod parser_models; +pub mod server_models; diff --git a/src/config/parser_models.rs b/src/models/parser_models.rs similarity index 100% rename from src/config/parser_models.rs rename to src/models/parser_models.rs diff --git a/src/models/server_models.rs b/src/models/server_models.rs new file mode 100644 index 0000000..3da6717 --- /dev/null +++ b/src/models/server_models.rs @@ -0,0 +1,26 @@ +//! This module provides the models to parse cookies and search parameters from the search +//! engine website. +use serde::Deserialize; + +/// A named struct which deserializes all the user provided search parameters and stores them. +#[derive(Deserialize)] +pub struct SearchParams { + /// It stores the search parameter option `q` (or query in simple words) + /// of the search url. + pub q: Option, + /// It stores the search parameter `page` (or pageno in simple words) + /// of the search url. + pub page: Option, +} + +/// A named struct which is used to deserialize the cookies fetched from the client side. +#[allow(dead_code)] +#[derive(Deserialize)] +pub struct Cookie { + /// It stores the theme name used in the website. + pub theme: String, + /// It stores the colorscheme name used for the website theme. + pub colorscheme: String, + /// It stores the user selected upstream search engines selected from the UI. + pub engines: Vec, +} diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 3f06ecb..38cff6a 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -1,27 +1,23 @@ //! This module provides the functionality to scrape and gathers all the results from the upstream //! search engines and then removes duplicate results. +use super::user_agent::random_user_agent; +use crate::handler::paths::{file_path, FileType}; +use crate::models::{ + aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, + engine_models::{EngineError, EngineHandler}, +}; +use error_stack::Report; +use rand::Rng; +use regex::Regex; use std::{ collections::HashMap, io::{BufReader, Read}, time::Duration, }; - -use super::{ - aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, - user_agent::random_user_agent, -}; -use error_stack::Report; -use rand::Rng; -use regex::Regex; use std::{fs::File, io::BufRead}; use tokio::task::JoinHandle; -use crate::{ - engines::engine_models::{EngineError, EngineHandler}, - handler::paths::{file_path, FileType}, -}; - /// Aliases for long type annotations type FutureVec = Vec, Report>>>; diff --git a/src/results/mod.rs b/src/results/mod.rs index b08eec0..9ec3229 100644 --- a/src/results/mod.rs +++ b/src/results/mod.rs @@ -2,6 +2,5 @@ //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also, //! provides various models to aggregate search results into a standardized form. -pub mod aggregation_models; pub mod aggregator; pub mod user_agent; diff --git a/src/server/mod.rs b/src/server/mod.rs index f5d2ce9..7f4274f 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -3,4 +3,5 @@ //! the search route. Also, caches the next, current and previous search results in the search //! routes with the help of the redis server. +pub mod router; pub mod routes; diff --git a/src/server/router.rs b/src/server/router.rs new file mode 100644 index 0000000..69a3ede --- /dev/null +++ b/src/server/router.rs @@ -0,0 +1,64 @@ +//! This module provides the functionality to handle different routes of the `websurfx` +//! meta search engine website and provide appropriate response to each route/page +//! when requested. + +use crate::{ + config::parser::Config, + handler::paths::{file_path, FileType}, +}; +use actix_web::{get, web, HttpRequest, HttpResponse}; +use handlebars::Handlebars; +use std::fs::read_to_string; + +/// Handles the route of index page or main page of the `websurfx` meta search engine website. +#[get("/")] +pub async fn index( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("index", &config.style).unwrap(); + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of any other accessed route/page which is not provided by the +/// website essentially the 404 error page. +pub async fn not_found( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("404", &config.style)?; + + Ok(HttpResponse::Ok() + .content_type("text/html; charset=utf-8") + .body(page_content)) +} + +/// Handles the route of robots.txt page of the `websurfx` meta search engine website. +#[get("/robots.txt")] +pub async fn robots_data(_req: HttpRequest) -> Result> { + let page_content: String = + read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; + Ok(HttpResponse::Ok() + .content_type("text/plain; charset=ascii") + .body(page_content)) +} + +/// Handles the route of about page of the `websurfx` meta search engine website. +#[get("/about")] +pub async fn about( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("about", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of settings page of the `websurfx` meta search engine website. +#[get("/settings")] +pub async fn settings( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("settings", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} diff --git a/src/server/routes/mod.rs b/src/server/routes/mod.rs new file mode 100644 index 0000000..6bc5750 --- /dev/null +++ b/src/server/routes/mod.rs @@ -0,0 +1,3 @@ +//! This module provides modules to handle various routes in the search engine website. + +pub mod search; diff --git a/src/server/routes.rs b/src/server/routes/search.rs similarity index 62% rename from src/server/routes.rs rename to src/server/routes/search.rs index 818fac5..a4839fb 100644 --- a/src/server/routes.rs +++ b/src/server/routes/search.rs @@ -1,67 +1,19 @@ -//! This module provides the functionality to handle different routes of the `websurfx` -//! meta search engine website and provide appropriate response to each route/page -//! when requested. - -use std::fs::read_to_string; +//! This module handles the search route of the search engine website. use crate::{ cache::cacher::RedisCache, config::parser::Config, - engines::engine_models::EngineHandler, - handler::paths::{file_path, FileType}, - results::{aggregation_models::SearchResults, aggregator::aggregate}, + models::{ + aggregation_models::SearchResults, + engine_models::EngineHandler, + server_models::{Cookie, SearchParams}, + }, + results::aggregator::aggregate, }; use actix_web::{get, web, HttpRequest, HttpResponse}; use handlebars::Handlebars; -use serde::Deserialize; use tokio::join; -/// A named struct which deserializes all the user provided search parameters and stores them. -#[derive(Deserialize)] -struct SearchParams { - /// It stores the search parameter option `q` (or query in simple words) - /// of the search url. - q: Option, - /// It stores the search parameter `page` (or pageno in simple words) - /// of the search url. - page: Option, -} - -/// Handles the route of index page or main page of the `websurfx` meta search engine website. -#[get("/")] -pub async fn index( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("index", &config.style).unwrap(); - Ok(HttpResponse::Ok().body(page_content)) -} - -/// Handles the route of any other accessed route/page which is not provided by the -/// website essentially the 404 error page. -pub async fn not_found( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("404", &config.style)?; - - Ok(HttpResponse::Ok() - .content_type("text/html; charset=utf-8") - .body(page_content)) -} - -/// A named struct which is used to deserialize the cookies fetched from the client side. -#[allow(dead_code)] -#[derive(Deserialize)] -struct Cookie { - /// It stores the theme name used in the website. - theme: String, - /// It stores the colorscheme name used for the website theme. - colorscheme: String, - /// It stores the user selected upstream search engines selected from the UI. - engines: Vec, -} - /// Handles the route of search page of the `websurfx` meta search engine website and it takes /// two search url parameters `q` and `page` where `page` parameter is optional. /// @@ -178,9 +130,7 @@ async fn results( // default selected upstream search engines from the config file otherwise // parse the non-empty cookie and grab the user selected engines from the // UI and use that. - let mut results: crate::results::aggregation_models::SearchResults = match req - .cookie("appCookie") - { + let mut results: SearchResults = match req.cookie("appCookie") { Some(cookie_value) => { let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?; @@ -218,33 +168,3 @@ async fn results( } } } - -/// Handles the route of robots.txt page of the `websurfx` meta search engine website. -#[get("/robots.txt")] -pub async fn robots_data(_req: HttpRequest) -> Result> { - let page_content: String = - read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; - Ok(HttpResponse::Ok() - .content_type("text/plain; charset=ascii") - .body(page_content)) -} - -/// Handles the route of about page of the `websurfx` meta search engine website. -#[get("/about")] -pub async fn about( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("about", &config.style)?; - Ok(HttpResponse::Ok().body(page_content)) -} - -/// Handles the route of settings page of the `websurfx` meta search engine website. -#[get("/settings")] -pub async fn settings( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("settings", &config.style)?; - Ok(HttpResponse::Ok().body(page_content)) -} From db009454c82deec7658b05f2b18c076d6d777235 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 21:03:58 +0300 Subject: [PATCH 2/3] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20refactor:=20change=20i?= =?UTF-8?q?mport=20path=20for=20search=20route=20(#207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 97208be..f03751b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,7 +84,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { ) .service(router::robots_data) // robots.txt .service(router::index) // index page - .service(router::search) // search page + .service(server::routes::search::search) // search page .service(router::about) // about page .service(router::settings) // settings page .default_service(web::route().to(router::not_found)) // error page From 485a5a1e8a6c491cf7ae129fe05f41713ff0211b Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 21:05:31 +0300 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=9A=80=20chore:=20bump=20the=20app=20?= =?UTF-8?q?version=20(#207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 30 +++++++++++++++--------------- Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67165f7..aa4127a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,7 +103,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -216,7 +216,7 @@ dependencies = [ "actix-router", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -302,7 +302,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -798,7 +798,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -1825,7 +1825,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -1944,7 +1944,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -2046,7 +2046,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -2687,7 +2687,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -2926,9 +2926,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.30" +version = "2.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ddc1f908d32ec46858c2d3b3daa00cc35bf4b6841ce4355c7bb3eedf2283a68" +checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", @@ -2997,7 +2997,7 @@ checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -3152,7 +3152,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -3474,7 +3474,7 @@ dependencies = [ "once_cell", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", "wasm-bindgen-shared", ] @@ -3508,7 +3508,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3531,7 +3531,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.20.1" +version = "0.20.2" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index cc7309a..fa46291 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.20.1" +version = "0.20.2" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx"