diff --git a/Cargo.lock b/Cargo.lock index 29a28c5..ce99719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3797,7 +3797,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.20.1" +version = "0.20.2" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index 7d2ef7d..477556f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.20.1" +version = "0.20.2" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" diff --git a/src/config/mod.rs b/src/config/mod.rs index 331a3d7..babc54f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2,4 +2,3 @@ //! and convert the config options into rust readable form. pub mod parser; -pub mod parser_models; diff --git a/src/config/parser.rs b/src/config/parser.rs index 4490bc6..782b026 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -3,7 +3,7 @@ use crate::handler::paths::{file_path, FileType}; -use super::parser_models::{AggregatorConfig, RateLimiter, Style}; +use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; use mlua::Lua; use std::{collections::HashMap, fs, thread::available_parallelism}; @@ -27,7 +27,7 @@ pub struct Config { /// It stores the option to whether enable or disable debug mode. pub debug: bool, /// It stores all the engine names that were enabled by the user. - pub upstream_search_engines: Vec, + pub upstream_search_engines: Vec, /// It stores the time (secs) which controls the server request timeout. pub request_timeout: u8, /// It stores the number of threads which controls the app will use to run. @@ -109,7 +109,7 @@ impl Config { .get::<_, HashMap>("upstream_search_engines")? .into_iter() .filter_map(|(key, value)| value.then_some(key)) - .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine)) + .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine)) .collect(), request_timeout: globals.get::<_, u8>("request_timeout")?, threads, diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index edca35a..0f06ea4 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -7,9 +7,9 @@ use std::collections::HashMap; use reqwest::header::HeaderMap; use scraper::{Html, Selector}; -use crate::results::aggregation_models::SearchResult; +use crate::models::aggregation_models::SearchResult; -use super::engine_models::{EngineError, SearchEngine}; +use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{Report, Result, ResultExt}; diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 8267c93..0016728 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -4,5 +4,4 @@ //! code. Moreover, it also provides a custom error for the upstream search engine handling code. pub mod duckduckgo; -pub mod engine_models; pub mod searx; diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 170364c..6ab0469 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -6,9 +6,8 @@ use reqwest::header::HeaderMap; use scraper::{Html, Selector}; use std::collections::HashMap; -use crate::results::aggregation_models::SearchResult; - -use super::engine_models::{EngineError, SearchEngine}; +use crate::models::aggregation_models::SearchResult; +use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{Report, Result, ResultExt}; /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to diff --git a/src/lib.rs b/src/lib.rs index b33ace3..8c74e6a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,12 +9,13 @@ pub mod cache; pub mod config; pub mod engines; pub mod handler; +pub mod models; pub mod results; pub mod server; use std::net::TcpListener; -use crate::server::routes; +use crate::server::router; use actix_cors::Cors; use actix_files as fs; @@ -89,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { fs::Files::new("/images", format!("{}/images", public_folder_path)) .show_files_listing(), ) - .service(routes::robots_data) // robots.txt - .service(routes::index) // index page - .service(routes::search) // search page - .service(routes::about) // about page - .service(routes::settings) // settings page - .default_service(web::route().to(routes::not_found)) // error page + .service(router::robots_data) // robots.txt + .service(router::index) // index page + .service(server::routes::search::search) // search page + .service(router::about) // about page + .service(router::settings) // settings page + .default_service(web::route().to(router::not_found)) // error page }) .workers(cloned_config_threads_opt as usize) // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080. diff --git a/src/results/aggregation_models.rs b/src/models/aggregation_models.rs similarity index 98% rename from src/results/aggregation_models.rs rename to src/models/aggregation_models.rs index 30316e2..ea4a914 100644 --- a/src/results/aggregation_models.rs +++ b/src/models/aggregation_models.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; use smallvec::SmallVec; -use crate::{config::parser_models::Style, engines::engine_models::EngineError}; +use super::{engine_models::EngineError, parser_models::Style}; /// A named struct to store the raw scraped search results scraped search results from the /// upstream search engines before aggregating it.It derives the Clone trait which is needed diff --git a/src/engines/engine_models.rs b/src/models/engine_models.rs similarity index 95% rename from src/engines/engine_models.rs rename to src/models/engine_models.rs index 2bd50c6..d4a4e72 100644 --- a/src/engines/engine_models.rs +++ b/src/models/engine_models.rs @@ -1,7 +1,7 @@ //! This module provides the error enum to handle different errors associated while requesting data from //! the upstream search engines with the search query provided by the user. -use crate::results::aggregation_models::SearchResult; +use super::aggregation_models::SearchResult; use error_stack::{Result, ResultExt}; use std::{collections::HashMap, fmt, time::Duration}; @@ -137,8 +137,11 @@ impl EngineHandler { pub fn new(engine_name: &str) -> Option { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { - "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)), - "searx" => ("searx", Box::new(super::searx::Searx)), + "duckduckgo" => ( + "duckduckgo", + Box::new(crate::engines::duckduckgo::DuckDuckGo), + ), + "searx" => ("searx", Box::new(crate::engines::searx::Searx)), _ => return None, }; diff --git a/src/models/mod.rs b/src/models/mod.rs new file mode 100644 index 0000000..6a7d235 --- /dev/null +++ b/src/models/mod.rs @@ -0,0 +1,8 @@ +//! This module provides modules which in turn provides various models for aggregrating search +//! results, parsing config file, providing trait to standardize search engine handling code, +//! custom engine error for the search engine, etc. + +pub mod aggregation_models; +pub mod engine_models; +pub mod parser_models; +pub mod server_models; diff --git a/src/config/parser_models.rs b/src/models/parser_models.rs similarity index 100% rename from src/config/parser_models.rs rename to src/models/parser_models.rs diff --git a/src/models/server_models.rs b/src/models/server_models.rs new file mode 100644 index 0000000..3da6717 --- /dev/null +++ b/src/models/server_models.rs @@ -0,0 +1,26 @@ +//! This module provides the models to parse cookies and search parameters from the search +//! engine website. +use serde::Deserialize; + +/// A named struct which deserializes all the user provided search parameters and stores them. +#[derive(Deserialize)] +pub struct SearchParams { + /// It stores the search parameter option `q` (or query in simple words) + /// of the search url. + pub q: Option, + /// It stores the search parameter `page` (or pageno in simple words) + /// of the search url. + pub page: Option, +} + +/// A named struct which is used to deserialize the cookies fetched from the client side. +#[allow(dead_code)] +#[derive(Deserialize)] +pub struct Cookie { + /// It stores the theme name used in the website. + pub theme: String, + /// It stores the colorscheme name used for the website theme. + pub colorscheme: String, + /// It stores the user selected upstream search engines selected from the UI. + pub engines: Vec, +} diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 734a65f..8c9be2c 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -1,27 +1,23 @@ //! This module provides the functionality to scrape and gathers all the results from the upstream //! search engines and then removes duplicate results. +use super::user_agent::random_user_agent; +use crate::handler::paths::{file_path, FileType}; +use crate::models::{ + aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, + engine_models::{EngineError, EngineHandler}, +}; +use error_stack::Report; +use rand::Rng; +use regex::Regex; use std::{ collections::HashMap, io::{BufReader, Read}, time::Duration, }; - -use super::{ - aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, - user_agent::random_user_agent, -}; -use error_stack::Report; -use rand::Rng; -use regex::Regex; use std::{fs::File, io::BufRead}; use tokio::task::JoinHandle; -use crate::{ - engines::engine_models::{EngineError, EngineHandler}, - handler::paths::{file_path, FileType}, -}; - /// Aliases for long type annotations type FutureVec = Vec, Report>>>; diff --git a/src/results/mod.rs b/src/results/mod.rs index b08eec0..9ec3229 100644 --- a/src/results/mod.rs +++ b/src/results/mod.rs @@ -2,6 +2,5 @@ //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also, //! provides various models to aggregate search results into a standardized form. -pub mod aggregation_models; pub mod aggregator; pub mod user_agent; diff --git a/src/server/mod.rs b/src/server/mod.rs index f5d2ce9..7f4274f 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -3,4 +3,5 @@ //! the search route. Also, caches the next, current and previous search results in the search //! routes with the help of the redis server. +pub mod router; pub mod routes; diff --git a/src/server/router.rs b/src/server/router.rs new file mode 100644 index 0000000..69a3ede --- /dev/null +++ b/src/server/router.rs @@ -0,0 +1,64 @@ +//! This module provides the functionality to handle different routes of the `websurfx` +//! meta search engine website and provide appropriate response to each route/page +//! when requested. + +use crate::{ + config::parser::Config, + handler::paths::{file_path, FileType}, +}; +use actix_web::{get, web, HttpRequest, HttpResponse}; +use handlebars::Handlebars; +use std::fs::read_to_string; + +/// Handles the route of index page or main page of the `websurfx` meta search engine website. +#[get("/")] +pub async fn index( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("index", &config.style).unwrap(); + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of any other accessed route/page which is not provided by the +/// website essentially the 404 error page. +pub async fn not_found( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("404", &config.style)?; + + Ok(HttpResponse::Ok() + .content_type("text/html; charset=utf-8") + .body(page_content)) +} + +/// Handles the route of robots.txt page of the `websurfx` meta search engine website. +#[get("/robots.txt")] +pub async fn robots_data(_req: HttpRequest) -> Result> { + let page_content: String = + read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; + Ok(HttpResponse::Ok() + .content_type("text/plain; charset=ascii") + .body(page_content)) +} + +/// Handles the route of about page of the `websurfx` meta search engine website. +#[get("/about")] +pub async fn about( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("about", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of settings page of the `websurfx` meta search engine website. +#[get("/settings")] +pub async fn settings( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("settings", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} diff --git a/src/server/routes/mod.rs b/src/server/routes/mod.rs new file mode 100644 index 0000000..6bc5750 --- /dev/null +++ b/src/server/routes/mod.rs @@ -0,0 +1,3 @@ +//! This module provides modules to handle various routes in the search engine website. + +pub mod search; diff --git a/src/server/routes.rs b/src/server/routes/search.rs similarity index 97% rename from src/server/routes.rs rename to src/server/routes/search.rs index 57aa413..254c038 100644 --- a/src/server/routes.rs +++ b/src/server/routes/search.rs @@ -1,23 +1,20 @@ -//! This module provides the functionality to handle different routes of the `websurfx` -//! meta search engine website and provide appropriate response to each route/page -//! when requested. - -use std::{ - fs::{read_to_string, File}, - io::{BufRead, BufReader, Read}, -}; +//! This module handles the search route of the search engine website. use crate::{ cache::cacher::RedisCache, config::parser::Config, - engines::engine_models::EngineHandler, handler::paths::{file_path, FileType}, - results::{aggregation_models::SearchResults, aggregator::aggregate}, + models::{aggregation_models::SearchResults, engine_models::EngineHandler}, + results::aggregator::aggregate, }; use actix_web::{get, web, HttpRequest, HttpResponse}; use handlebars::Handlebars; use regex::Regex; use serde::Deserialize; +use std::{ + fs::{read_to_string, File}, + io::{BufRead, BufReader, Read}, +}; use tokio::join; // ---- Constants ---- @@ -26,7 +23,7 @@ static REDIS_CACHE: async_once_cell::OnceCell = async_once_cell::Onc /// A named struct which deserializes all the user provided search parameters and stores them. #[derive(Deserialize)] -struct SearchParams { +pub struct SearchParams { /// It stores the search parameter option `q` (or query in simple words) /// of the search url. q: Option,