0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-12-22 20:38:22 -05:00

⚙️ refactor: reorganize code & restructure codebase for better maintainability (#207)

This commit is contained in:
neon_arch 2023-09-03 20:50:50 +03:00
parent 453dbdc47d
commit 493c56bd02
17 changed files with 141 additions and 122 deletions

View File

@ -2,4 +2,3 @@
//! and convert the config options into rust readable form.
pub mod parser;
pub mod parser_models;

View File

@ -3,7 +3,7 @@
use crate::handler::paths::{file_path, FileType};
use super::parser_models::Style;
use crate::models::parser_models::Style;
use log::LevelFilter;
use rlua::Lua;
use std::{collections::HashMap, fs, thread::available_parallelism};
@ -27,7 +27,7 @@ pub struct Config {
/// It stores the option to whether enable or disable debug mode.
pub debug: bool,
/// It stores all the engine names that were enabled by the user.
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
/// It stores the time (secs) which controls the server request timeout.
pub request_timeout: u8,
/// It stores the number of threads which controls the app will use to run.
@ -99,7 +99,7 @@ impl Config {
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
.into_iter()
.filter_map(|(key, value)| value.then_some(key))
.filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine))
.filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
.collect(),
request_timeout: globals.get::<_, u8>("request_timeout")?,
threads,

View File

@ -7,9 +7,9 @@ use std::collections::HashMap;
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use scraper::{Html, Selector};
use crate::results::aggregation_models::SearchResult;
use crate::models::aggregation_models::SearchResult;
use super::engine_models::{EngineError, SearchEngine};
use crate::models::engine_models::{EngineError, SearchEngine};
use error_stack::{IntoReport, Report, Result, ResultExt};

View File

@ -4,5 +4,4 @@
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
pub mod duckduckgo;
pub mod engine_models;
pub mod searx;

View File

@ -6,9 +6,9 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use scraper::{Html, Selector};
use std::collections::HashMap;
use crate::results::aggregation_models::SearchResult;
use crate::models::aggregation_models::SearchResult;
use super::engine_models::{EngineError, SearchEngine};
use crate::models::engine_models::{EngineError, SearchEngine};
use error_stack::{IntoReport, Report, Result, ResultExt};
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to

View File

@ -9,12 +9,13 @@ pub mod cache;
pub mod config;
pub mod engines;
pub mod handler;
pub mod models;
pub mod results;
pub mod server;
use std::net::TcpListener;
use crate::server::routes;
use crate::server::router;
use actix_cors::Cors;
use actix_files as fs;
@ -81,12 +82,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
fs::Files::new("/images", format!("{}/images", public_folder_path))
.show_files_listing(),
)
.service(routes::robots_data) // robots.txt
.service(routes::index) // index page
.service(routes::search) // search page
.service(routes::about) // about page
.service(routes::settings) // settings page
.default_service(web::route().to(routes::not_found)) // error page
.service(router::robots_data) // robots.txt
.service(router::index) // index page
.service(router::search) // search page
.service(router::about) // about page
.service(router::settings) // settings page
.default_service(web::route().to(router::not_found)) // error page
})
.workers(cloned_config_threads_opt as usize)
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.

View File

@ -3,7 +3,7 @@
use serde::{Deserialize, Serialize};
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
use super::{engine_models::EngineError, parser_models::Style};
/// A named struct to store the raw scraped search results scraped search results from the
/// upstream search engines before aggregating it.It derives the Clone trait which is needed

View File

@ -1,7 +1,7 @@
//! This module provides the error enum to handle different errors associated while requesting data from
//! the upstream search engines with the search query provided by the user.
use crate::results::aggregation_models::SearchResult;
use super::aggregation_models::SearchResult;
use error_stack::{IntoReport, Result, ResultExt};
use std::{collections::HashMap, fmt, time::Duration};
@ -138,8 +138,11 @@ impl EngineHandler {
pub fn new(engine_name: &str) -> Option<Self> {
let engine: (&'static str, Box<dyn SearchEngine>) =
match engine_name.to_lowercase().as_str() {
"duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
"searx" => ("searx", Box::new(super::searx::Searx)),
"duckduckgo" => (
"duckduckgo",
Box::new(crate::engines::duckduckgo::DuckDuckGo),
),
"searx" => ("searx", Box::new(crate::engines::searx::Searx)),
_ => return None,
};

8
src/models/mod.rs Normal file
View File

@ -0,0 +1,8 @@
//! This module provides modules which in turn provides various models for aggregrating search
//! results, parsing config file, providing trait to standardize search engine handling code,
//! custom engine error for the search engine, etc.
pub mod aggregation_models;
pub mod engine_models;
pub mod parser_models;
pub mod server_models;

View File

@ -0,0 +1,26 @@
//! This module provides the models to parse cookies and search parameters from the search
//! engine website.
use serde::Deserialize;
/// A named struct which deserializes all the user provided search parameters and stores them.
#[derive(Deserialize)]
pub struct SearchParams {
/// It stores the search parameter option `q` (or query in simple words)
/// of the search url.
pub q: Option<String>,
/// It stores the search parameter `page` (or pageno in simple words)
/// of the search url.
pub page: Option<u32>,
}
/// A named struct which is used to deserialize the cookies fetched from the client side.
#[allow(dead_code)]
#[derive(Deserialize)]
pub struct Cookie {
/// It stores the theme name used in the website.
pub theme: String,
/// It stores the colorscheme name used for the website theme.
pub colorscheme: String,
/// It stores the user selected upstream search engines selected from the UI.
pub engines: Vec<String>,
}

View File

@ -1,27 +1,23 @@
//! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results.
use super::user_agent::random_user_agent;
use crate::handler::paths::{file_path, FileType};
use crate::models::{
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
engine_models::{EngineError, EngineHandler},
};
use error_stack::Report;
use rand::Rng;
use regex::Regex;
use std::{
collections::HashMap,
io::{BufReader, Read},
time::Duration,
};
use super::{
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
user_agent::random_user_agent,
};
use error_stack::Report;
use rand::Rng;
use regex::Regex;
use std::{fs::File, io::BufRead};
use tokio::task::JoinHandle;
use crate::{
engines::engine_models::{EngineError, EngineHandler},
handler::paths::{file_path, FileType},
};
/// Aliases for long type annotations
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;

View File

@ -2,6 +2,5 @@
//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
//! provides various models to aggregate search results into a standardized form.
pub mod aggregation_models;
pub mod aggregator;
pub mod user_agent;

View File

@ -3,4 +3,5 @@
//! the search route. Also, caches the next, current and previous search results in the search
//! routes with the help of the redis server.
pub mod router;
pub mod routes;

64
src/server/router.rs Normal file
View File

@ -0,0 +1,64 @@
//! This module provides the functionality to handle different routes of the `websurfx`
//! meta search engine website and provide appropriate response to each route/page
//! when requested.
use crate::{
config::parser::Config,
handler::paths::{file_path, FileType},
};
use actix_web::{get, web, HttpRequest, HttpResponse};
use handlebars::Handlebars;
use std::fs::read_to_string;
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")]
pub async fn index(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("index", &config.style).unwrap();
Ok(HttpResponse::Ok().body(page_content))
}
/// Handles the route of any other accessed route/page which is not provided by the
/// website essentially the 404 error page.
pub async fn not_found(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("404", &config.style)?;
Ok(HttpResponse::Ok()
.content_type("text/html; charset=utf-8")
.body(page_content))
}
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
#[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String =
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
Ok(HttpResponse::Ok()
.content_type("text/plain; charset=ascii")
.body(page_content))
}
/// Handles the route of about page of the `websurfx` meta search engine website.
#[get("/about")]
pub async fn about(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("about", &config.style)?;
Ok(HttpResponse::Ok().body(page_content))
}
/// Handles the route of settings page of the `websurfx` meta search engine website.
#[get("/settings")]
pub async fn settings(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("settings", &config.style)?;
Ok(HttpResponse::Ok().body(page_content))
}

3
src/server/routes/mod.rs Normal file
View File

@ -0,0 +1,3 @@
//! This module provides modules to handle various routes in the search engine website.
pub mod search;

View File

@ -1,67 +1,19 @@
//! This module provides the functionality to handle different routes of the `websurfx`
//! meta search engine website and provide appropriate response to each route/page
//! when requested.
use std::fs::read_to_string;
//! This module handles the search route of the search engine website.
use crate::{
cache::cacher::RedisCache,
config::parser::Config,
engines::engine_models::EngineHandler,
handler::paths::{file_path, FileType},
results::{aggregation_models::SearchResults, aggregator::aggregate},
models::{
aggregation_models::SearchResults,
engine_models::EngineHandler,
server_models::{Cookie, SearchParams},
},
results::aggregator::aggregate,
};
use actix_web::{get, web, HttpRequest, HttpResponse};
use handlebars::Handlebars;
use serde::Deserialize;
use tokio::join;
/// A named struct which deserializes all the user provided search parameters and stores them.
#[derive(Deserialize)]
struct SearchParams {
/// It stores the search parameter option `q` (or query in simple words)
/// of the search url.
q: Option<String>,
/// It stores the search parameter `page` (or pageno in simple words)
/// of the search url.
page: Option<u32>,
}
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")]
pub async fn index(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("index", &config.style).unwrap();
Ok(HttpResponse::Ok().body(page_content))
}
/// Handles the route of any other accessed route/page which is not provided by the
/// website essentially the 404 error page.
pub async fn not_found(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("404", &config.style)?;
Ok(HttpResponse::Ok()
.content_type("text/html; charset=utf-8")
.body(page_content))
}
/// A named struct which is used to deserialize the cookies fetched from the client side.
#[allow(dead_code)]
#[derive(Deserialize)]
struct Cookie {
/// It stores the theme name used in the website.
theme: String,
/// It stores the colorscheme name used for the website theme.
colorscheme: String,
/// It stores the user selected upstream search engines selected from the UI.
engines: Vec<String>,
}
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
/// two search url parameters `q` and `page` where `page` parameter is optional.
///
@ -178,9 +130,7 @@ async fn results(
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
// UI and use that.
let mut results: crate::results::aggregation_models::SearchResults = match req
.cookie("appCookie")
{
let mut results: SearchResults = match req.cookie("appCookie") {
Some(cookie_value) => {
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
@ -218,33 +168,3 @@ async fn results(
}
}
}
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
#[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String =
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
Ok(HttpResponse::Ok()
.content_type("text/plain; charset=ascii")
.body(page_content))
}
/// Handles the route of about page of the `websurfx` meta search engine website.
#[get("/about")]
pub async fn about(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("about", &config.style)?;
Ok(HttpResponse::Ok().body(page_content))
}
/// Handles the route of settings page of the `websurfx` meta search engine website.
#[get("/settings")]
pub async fn settings(
hbs: web::Data<Handlebars<'_>>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = hbs.render("settings", &config.style)?;
Ok(HttpResponse::Ok().body(page_content))
}