mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 05:58:21 -05:00
Merge pull request #208 from neon-mmd/reorganize-code-and-restructure-the-codebase
⚙️ Improve the project structure for better maintainability of code
This commit is contained in:
commit
e19038b82c
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -3797,7 +3797,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "0.20.1"
|
version = "0.20.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-files",
|
"actix-files",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "0.20.1"
|
version = "0.20.2"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||||
repository = "https://github.com/neon-mmd/websurfx"
|
repository = "https://github.com/neon-mmd/websurfx"
|
||||||
|
@ -2,4 +2,3 @@
|
|||||||
//! and convert the config options into rust readable form.
|
//! and convert the config options into rust readable form.
|
||||||
|
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod parser_models;
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
use crate::handler::paths::{file_path, FileType};
|
use crate::handler::paths::{file_path, FileType};
|
||||||
|
|
||||||
use super::parser_models::{AggregatorConfig, RateLimiter, Style};
|
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
||||||
use log::LevelFilter;
|
use log::LevelFilter;
|
||||||
use mlua::Lua;
|
use mlua::Lua;
|
||||||
use std::{collections::HashMap, fs, thread::available_parallelism};
|
use std::{collections::HashMap, fs, thread::available_parallelism};
|
||||||
@ -27,7 +27,7 @@ pub struct Config {
|
|||||||
/// It stores the option to whether enable or disable debug mode.
|
/// It stores the option to whether enable or disable debug mode.
|
||||||
pub debug: bool,
|
pub debug: bool,
|
||||||
/// It stores all the engine names that were enabled by the user.
|
/// It stores all the engine names that were enabled by the user.
|
||||||
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
|
pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
|
||||||
/// It stores the time (secs) which controls the server request timeout.
|
/// It stores the time (secs) which controls the server request timeout.
|
||||||
pub request_timeout: u8,
|
pub request_timeout: u8,
|
||||||
/// It stores the number of threads which controls the app will use to run.
|
/// It stores the number of threads which controls the app will use to run.
|
||||||
@ -109,7 +109,7 @@ impl Config {
|
|||||||
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(key, value)| value.then_some(key))
|
.filter_map(|(key, value)| value.then_some(key))
|
||||||
.filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine))
|
.filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
|
||||||
.collect(),
|
.collect(),
|
||||||
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
||||||
threads,
|
threads,
|
||||||
|
@ -7,9 +7,9 @@ use std::collections::HashMap;
|
|||||||
use reqwest::header::HeaderMap;
|
use reqwest::header::HeaderMap;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
use crate::results::aggregation_models::SearchResult;
|
use crate::models::aggregation_models::SearchResult;
|
||||||
|
|
||||||
use super::engine_models::{EngineError, SearchEngine};
|
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||||
|
|
||||||
use error_stack::{Report, Result, ResultExt};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
|
|
||||||
|
@ -4,5 +4,4 @@
|
|||||||
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
||||||
|
|
||||||
pub mod duckduckgo;
|
pub mod duckduckgo;
|
||||||
pub mod engine_models;
|
|
||||||
pub mod searx;
|
pub mod searx;
|
||||||
|
@ -6,9 +6,8 @@ use reqwest::header::HeaderMap;
|
|||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::results::aggregation_models::SearchResult;
|
use crate::models::aggregation_models::SearchResult;
|
||||||
|
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||||
use super::engine_models::{EngineError, SearchEngine};
|
|
||||||
use error_stack::{Report, Result, ResultExt};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
|
|
||||||
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||||
|
15
src/lib.rs
15
src/lib.rs
@ -9,12 +9,13 @@ pub mod cache;
|
|||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod engines;
|
pub mod engines;
|
||||||
pub mod handler;
|
pub mod handler;
|
||||||
|
pub mod models;
|
||||||
pub mod results;
|
pub mod results;
|
||||||
pub mod server;
|
pub mod server;
|
||||||
|
|
||||||
use std::net::TcpListener;
|
use std::net::TcpListener;
|
||||||
|
|
||||||
use crate::server::routes;
|
use crate::server::router;
|
||||||
|
|
||||||
use actix_cors::Cors;
|
use actix_cors::Cors;
|
||||||
use actix_files as fs;
|
use actix_files as fs;
|
||||||
@ -89,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|||||||
fs::Files::new("/images", format!("{}/images", public_folder_path))
|
fs::Files::new("/images", format!("{}/images", public_folder_path))
|
||||||
.show_files_listing(),
|
.show_files_listing(),
|
||||||
)
|
)
|
||||||
.service(routes::robots_data) // robots.txt
|
.service(router::robots_data) // robots.txt
|
||||||
.service(routes::index) // index page
|
.service(router::index) // index page
|
||||||
.service(routes::search) // search page
|
.service(server::routes::search::search) // search page
|
||||||
.service(routes::about) // about page
|
.service(router::about) // about page
|
||||||
.service(routes::settings) // settings page
|
.service(router::settings) // settings page
|
||||||
.default_service(web::route().to(routes::not_found)) // error page
|
.default_service(web::route().to(router::not_found)) // error page
|
||||||
})
|
})
|
||||||
.workers(cloned_config_threads_opt as usize)
|
.workers(cloned_config_threads_opt as usize)
|
||||||
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
use super::{engine_models::EngineError, parser_models::Style};
|
||||||
|
|
||||||
/// A named struct to store the raw scraped search results scraped search results from the
|
/// A named struct to store the raw scraped search results scraped search results from the
|
||||||
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
@ -1,7 +1,7 @@
|
|||||||
//! This module provides the error enum to handle different errors associated while requesting data from
|
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||||
//! the upstream search engines with the search query provided by the user.
|
//! the upstream search engines with the search query provided by the user.
|
||||||
|
|
||||||
use crate::results::aggregation_models::SearchResult;
|
use super::aggregation_models::SearchResult;
|
||||||
use error_stack::{Result, ResultExt};
|
use error_stack::{Result, ResultExt};
|
||||||
use std::{collections::HashMap, fmt, time::Duration};
|
use std::{collections::HashMap, fmt, time::Duration};
|
||||||
|
|
||||||
@ -137,8 +137,11 @@ impl EngineHandler {
|
|||||||
pub fn new(engine_name: &str) -> Option<Self> {
|
pub fn new(engine_name: &str) -> Option<Self> {
|
||||||
let engine: (&'static str, Box<dyn SearchEngine>) =
|
let engine: (&'static str, Box<dyn SearchEngine>) =
|
||||||
match engine_name.to_lowercase().as_str() {
|
match engine_name.to_lowercase().as_str() {
|
||||||
"duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
|
"duckduckgo" => (
|
||||||
"searx" => ("searx", Box::new(super::searx::Searx)),
|
"duckduckgo",
|
||||||
|
Box::new(crate::engines::duckduckgo::DuckDuckGo),
|
||||||
|
),
|
||||||
|
"searx" => ("searx", Box::new(crate::engines::searx::Searx)),
|
||||||
_ => return None,
|
_ => return None,
|
||||||
};
|
};
|
||||||
|
|
8
src/models/mod.rs
Normal file
8
src/models/mod.rs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
//! This module provides modules which in turn provides various models for aggregrating search
|
||||||
|
//! results, parsing config file, providing trait to standardize search engine handling code,
|
||||||
|
//! custom engine error for the search engine, etc.
|
||||||
|
|
||||||
|
pub mod aggregation_models;
|
||||||
|
pub mod engine_models;
|
||||||
|
pub mod parser_models;
|
||||||
|
pub mod server_models;
|
26
src/models/server_models.rs
Normal file
26
src/models/server_models.rs
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
//! This module provides the models to parse cookies and search parameters from the search
|
||||||
|
//! engine website.
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct SearchParams {
|
||||||
|
/// It stores the search parameter option `q` (or query in simple words)
|
||||||
|
/// of the search url.
|
||||||
|
pub q: Option<String>,
|
||||||
|
/// It stores the search parameter `page` (or pageno in simple words)
|
||||||
|
/// of the search url.
|
||||||
|
pub page: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct Cookie {
|
||||||
|
/// It stores the theme name used in the website.
|
||||||
|
pub theme: String,
|
||||||
|
/// It stores the colorscheme name used for the website theme.
|
||||||
|
pub colorscheme: String,
|
||||||
|
/// It stores the user selected upstream search engines selected from the UI.
|
||||||
|
pub engines: Vec<String>,
|
||||||
|
}
|
@ -1,27 +1,23 @@
|
|||||||
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
||||||
//! search engines and then removes duplicate results.
|
//! search engines and then removes duplicate results.
|
||||||
|
|
||||||
|
use super::user_agent::random_user_agent;
|
||||||
|
use crate::handler::paths::{file_path, FileType};
|
||||||
|
use crate::models::{
|
||||||
|
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
||||||
|
engine_models::{EngineError, EngineHandler},
|
||||||
|
};
|
||||||
|
use error_stack::Report;
|
||||||
|
use rand::Rng;
|
||||||
|
use regex::Regex;
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
io::{BufReader, Read},
|
io::{BufReader, Read},
|
||||||
time::Duration,
|
time::Duration,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
|
||||||
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
|
||||||
user_agent::random_user_agent,
|
|
||||||
};
|
|
||||||
use error_stack::Report;
|
|
||||||
use rand::Rng;
|
|
||||||
use regex::Regex;
|
|
||||||
use std::{fs::File, io::BufRead};
|
use std::{fs::File, io::BufRead};
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
|
|
||||||
use crate::{
|
|
||||||
engines::engine_models::{EngineError, EngineHandler},
|
|
||||||
handler::paths::{file_path, FileType},
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Aliases for long type annotations
|
/// Aliases for long type annotations
|
||||||
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
||||||
|
|
||||||
|
@ -2,6 +2,5 @@
|
|||||||
//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
|
//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
|
||||||
//! provides various models to aggregate search results into a standardized form.
|
//! provides various models to aggregate search results into a standardized form.
|
||||||
|
|
||||||
pub mod aggregation_models;
|
|
||||||
pub mod aggregator;
|
pub mod aggregator;
|
||||||
pub mod user_agent;
|
pub mod user_agent;
|
||||||
|
@ -3,4 +3,5 @@
|
|||||||
//! the search route. Also, caches the next, current and previous search results in the search
|
//! the search route. Also, caches the next, current and previous search results in the search
|
||||||
//! routes with the help of the redis server.
|
//! routes with the help of the redis server.
|
||||||
|
|
||||||
|
pub mod router;
|
||||||
pub mod routes;
|
pub mod routes;
|
||||||
|
64
src/server/router.rs
Normal file
64
src/server/router.rs
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
//! This module provides the functionality to handle different routes of the `websurfx`
|
||||||
|
//! meta search engine website and provide appropriate response to each route/page
|
||||||
|
//! when requested.
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::parser::Config,
|
||||||
|
handler::paths::{file_path, FileType},
|
||||||
|
};
|
||||||
|
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||||
|
use handlebars::Handlebars;
|
||||||
|
use std::fs::read_to_string;
|
||||||
|
|
||||||
|
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/")]
|
||||||
|
pub async fn index(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("index", &config.style).unwrap();
|
||||||
|
Ok(HttpResponse::Ok().body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of any other accessed route/page which is not provided by the
|
||||||
|
/// website essentially the 404 error page.
|
||||||
|
pub async fn not_found(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("404", &config.style)?;
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok()
|
||||||
|
.content_type("text/html; charset=utf-8")
|
||||||
|
.body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/robots.txt")]
|
||||||
|
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String =
|
||||||
|
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
|
||||||
|
Ok(HttpResponse::Ok()
|
||||||
|
.content_type("text/plain; charset=ascii")
|
||||||
|
.body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of about page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/about")]
|
||||||
|
pub async fn about(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("about", &config.style)?;
|
||||||
|
Ok(HttpResponse::Ok().body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of settings page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/settings")]
|
||||||
|
pub async fn settings(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("settings", &config.style)?;
|
||||||
|
Ok(HttpResponse::Ok().body(page_content))
|
||||||
|
}
|
3
src/server/routes/mod.rs
Normal file
3
src/server/routes/mod.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
//! This module provides modules to handle various routes in the search engine website.
|
||||||
|
|
||||||
|
pub mod search;
|
@ -1,23 +1,20 @@
|
|||||||
//! This module provides the functionality to handle different routes of the `websurfx`
|
//! This module handles the search route of the search engine website.
|
||||||
//! meta search engine website and provide appropriate response to each route/page
|
|
||||||
//! when requested.
|
|
||||||
|
|
||||||
use std::{
|
|
||||||
fs::{read_to_string, File},
|
|
||||||
io::{BufRead, BufReader, Read},
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
cache::cacher::RedisCache,
|
cache::cacher::RedisCache,
|
||||||
config::parser::Config,
|
config::parser::Config,
|
||||||
engines::engine_models::EngineHandler,
|
|
||||||
handler::paths::{file_path, FileType},
|
handler::paths::{file_path, FileType},
|
||||||
results::{aggregation_models::SearchResults, aggregator::aggregate},
|
models::{aggregation_models::SearchResults, engine_models::EngineHandler},
|
||||||
|
results::aggregator::aggregate,
|
||||||
};
|
};
|
||||||
use actix_web::{get, web, HttpRequest, HttpResponse};
|
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||||
use handlebars::Handlebars;
|
use handlebars::Handlebars;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
use std::{
|
||||||
|
fs::{read_to_string, File},
|
||||||
|
io::{BufRead, BufReader, Read},
|
||||||
|
};
|
||||||
use tokio::join;
|
use tokio::join;
|
||||||
|
|
||||||
// ---- Constants ----
|
// ---- Constants ----
|
||||||
@ -26,7 +23,7 @@ static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Onc
|
|||||||
|
|
||||||
/// A named struct which deserializes all the user provided search parameters and stores them.
|
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct SearchParams {
|
pub struct SearchParams {
|
||||||
/// It stores the search parameter option `q` (or query in simple words)
|
/// It stores the search parameter option `q` (or query in simple words)
|
||||||
/// of the search url.
|
/// of the search url.
|
||||||
q: Option<String>,
|
q: Option<String>,
|
Loading…
Reference in New Issue
Block a user