mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 05:58:21 -05:00
Merge pull request #206 from neon-mmd/change-document-style-with-linter-warnings
⚙️ Change document style with `linter` warnings
This commit is contained in:
commit
7a644549d2
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -3797,7 +3797,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "websurfx"
|
||||
version = "0.20.0"
|
||||
version = "0.20.1"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-files",
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "websurfx"
|
||||
version = "0.20.0"
|
||||
version = "0.20.1"
|
||||
edition = "2021"
|
||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||
repository = "https://github.com/neon-mmd/websurfx"
|
||||
|
11
src/cache/cacher.rs
vendored
11
src/cache/cacher.rs
vendored
@ -10,17 +10,14 @@ use super::error::PoolError;
|
||||
|
||||
/// A named struct which stores the redis Connection url address to which the client will
|
||||
/// connect to.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `connection_pool` - It stores a pool of connections ready to be used.
|
||||
/// * `pool_size` - It stores the size of the connection pool (in other words the number of
|
||||
/// connections that should be stored in the pool).
|
||||
/// * `current_connection` - It stores the index of which connection is being used at the moment.
|
||||
#[derive(Clone)]
|
||||
pub struct RedisCache {
|
||||
/// It stores a pool of connections ready to be used.
|
||||
connection_pool: Vec<ConnectionManager>,
|
||||
/// It stores the size of the connection pool (in other words the number of
|
||||
/// connections that should be stored in the pool).
|
||||
pool_size: u8,
|
||||
/// It stores the index of which connection is being used at the moment.
|
||||
current_connection: u8,
|
||||
}
|
||||
|
||||
|
9
src/cache/error.rs
vendored
9
src/cache/error.rs
vendored
@ -5,15 +5,12 @@ use std::fmt;
|
||||
use redis::RedisError;
|
||||
|
||||
/// A custom error type used for handling redis async pool associated errors.
|
||||
///
|
||||
/// This enum provides variants three different categories of errors:
|
||||
/// * `RedisError` - This variant handles all errors related to `RedisError`,
|
||||
/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
|
||||
/// which occurs when all the connections in the connection pool return a connection
|
||||
/// dropped redis error.
|
||||
#[derive(Debug)]
|
||||
pub enum PoolError {
|
||||
/// This variant handles all errors related to `RedisError`,
|
||||
RedisError(RedisError),
|
||||
/// This variant handles the errors which occurs when all the connections
|
||||
/// in the connection pool return a connection dropped redis error.
|
||||
PoolExhaustionWithConnectionDropError,
|
||||
}
|
||||
|
||||
|
3
src/cache/mod.rs
vendored
3
src/cache/mod.rs
vendored
@ -1,2 +1,5 @@
|
||||
//! This module provides the modules which provide the functionality to cache the aggregated
|
||||
//! results fetched and aggregated from the upstream search engines in a json format.
|
||||
|
||||
pub mod cacher;
|
||||
pub mod error;
|
||||
|
@ -1,2 +1,5 @@
|
||||
//! This module provides the modules which handles the functionality to parse the lua config
|
||||
//! and convert the config options into rust readable form.
|
||||
|
||||
pub mod parser;
|
||||
pub mod parser_models;
|
||||
|
@ -9,33 +9,33 @@ use mlua::Lua;
|
||||
use std::{collections::HashMap, fs, thread::available_parallelism};
|
||||
|
||||
/// A named struct which stores the parsed config file options.
|
||||
///
|
||||
/// # Fields
|
||||
//
|
||||
/// * `port` - It stores the parsed port number option on which the server should launch.
|
||||
/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
|
||||
/// * `style` - It stores the theming options for the website.
|
||||
/// * `redis_url` - It stores the redis connection url address on which the redis
|
||||
/// client should connect.
|
||||
/// * `aggregator` - It stores the option to whether enable or disable production use.
|
||||
/// * `logging` - It stores the option to whether enable or disable logs.
|
||||
/// * `debug` - It stores the option to whether enable or disable debug mode.
|
||||
/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
|
||||
/// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
|
||||
/// * `threads` - It stores the number of threads which controls the app will use to run.
|
||||
#[derive(Clone)]
|
||||
pub struct Config {
|
||||
/// It stores the parsed port number option on which the server should launch.
|
||||
pub port: u16,
|
||||
/// It stores the parsed ip address option on which the server should launch
|
||||
pub binding_ip: String,
|
||||
/// It stores the theming options for the website.
|
||||
pub style: Style,
|
||||
/// It stores the redis connection url address on which the redis
|
||||
/// client should connect.
|
||||
pub redis_url: String,
|
||||
/// It stores the option to whether enable or disable production use.
|
||||
pub aggregator: AggregatorConfig,
|
||||
/// It stores the option to whether enable or disable logs.
|
||||
pub logging: bool,
|
||||
/// It stores the option to whether enable or disable debug mode.
|
||||
pub debug: bool,
|
||||
/// It stores all the engine names that were enabled by the user.
|
||||
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
|
||||
/// It stores the time (secs) which controls the server request timeout.
|
||||
pub request_timeout: u8,
|
||||
/// It stores the number of threads which controls the app will use to run.
|
||||
pub threads: u8,
|
||||
/// It stores configuration options for the ratelimiting middleware.
|
||||
pub rate_limiter: RateLimiter,
|
||||
/// It stores the level of safe search to be used for restricting content in the
|
||||
/// search results.
|
||||
pub safe_search: u8,
|
||||
}
|
||||
|
||||
@ -123,6 +123,11 @@ impl Config {
|
||||
}
|
||||
|
||||
/// a helper function that sets the proper logging level
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `debug` - It takes the option to whether enable or disable debug mode.
|
||||
/// * `logging` - It takes the option to whether enable or disable logs.
|
||||
fn set_logging_level(debug: bool, logging: bool) {
|
||||
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
|
||||
if pkg_env_var.to_lowercase() == "dev" {
|
||||
|
@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
|
||||
/// order to allow the deserializing the json back to struct in aggregate function in
|
||||
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
||||
/// it to the template files.
|
||||
///
|
||||
/// # Fields
|
||||
//
|
||||
/// * `theme` - It stores the parsed theme option used to set a theme for the website.
|
||||
/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
|
||||
/// theme being used.
|
||||
#[derive(Serialize, Deserialize, Clone, Default)]
|
||||
pub struct Style {
|
||||
/// It stores the parsed theme option used to set a theme for the website.
|
||||
pub theme: String,
|
||||
/// It stores the parsed colorscheme option used to set a colorscheme for the
|
||||
/// theme being used.
|
||||
pub colorscheme: String,
|
||||
}
|
||||
|
||||
@ -38,24 +35,18 @@ impl Style {
|
||||
}
|
||||
|
||||
/// Configuration options for the aggregator.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `random_delay` - It stores the option to whether enable or disable random delays between
|
||||
/// requests.
|
||||
#[derive(Clone)]
|
||||
pub struct AggregatorConfig {
|
||||
/// It stores the option to whether enable or disable random delays between
|
||||
/// requests.
|
||||
pub random_delay: bool,
|
||||
}
|
||||
|
||||
/// Configuration options for the rate limiter middleware.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `number_of_requests` - The number of request that are allowed within a provided time limit.
|
||||
/// * `time_limit` - The time limit in which the quantity of requests that should be accepted.
|
||||
#[derive(Clone)]
|
||||
pub struct RateLimiter {
|
||||
/// The number of request that are allowed within a provided time limit.
|
||||
pub number_of_requests: u8,
|
||||
/// The time limit in which the quantity of requests that should be accepted.
|
||||
pub time_limit: u8,
|
||||
}
|
||||
|
@ -19,24 +19,6 @@ pub struct DuckDuckGo;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl SearchEngine for DuckDuckGo {
|
||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
||||
/// values are RawSearchResult struct and then returns it within a Result enum.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||
/// * `page` - Takes an u32 as an argument.
|
||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
async fn results(
|
||||
&self,
|
||||
query: &str,
|
||||
|
@ -6,19 +6,18 @@ use error_stack::{Result, ResultExt};
|
||||
use std::{collections::HashMap, fmt, time::Duration};
|
||||
|
||||
/// A custom error type used for handle engine associated errors.
|
||||
///
|
||||
/// This enum provides variants three different categories of errors:
|
||||
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
|
||||
/// etc.
|
||||
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
|
||||
/// search engines.
|
||||
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
||||
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
||||
/// all other errors occurring within the code handling the `upstream search engines`.
|
||||
#[derive(Debug)]
|
||||
pub enum EngineError {
|
||||
/// This variant handles all request related errors like forbidden, not found,
|
||||
/// etc.
|
||||
EmptyResultSet,
|
||||
/// This variant handles the not results found error provide by the upstream
|
||||
/// search engines.
|
||||
RequestError,
|
||||
/// This variant handles all the errors which are unexpected or occur rarely
|
||||
/// and are errors mostly related to failure in initialization of HeaderMap,
|
||||
/// Selector errors and all other errors occurring within the code handling
|
||||
/// the `upstream search engines`.
|
||||
UnexpectedError,
|
||||
}
|
||||
|
||||
@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
|
||||
/// A trait to define common behavior for all search engines.
|
||||
#[async_trait::async_trait]
|
||||
pub trait SearchEngine: Sync + Send {
|
||||
/// This helper function fetches/requests the search results from the upstream search engine in
|
||||
/// an html form.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes the url of the upstream search engine with the user requested search
|
||||
/// query appended in the search parameters.
|
||||
/// * `header_map` - It takes the http request headers to be sent to the upstream engine in
|
||||
/// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
|
||||
/// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
|
||||
/// the amount of time for each request to remain connected when until the results can be provided
|
||||
/// by the upstream engine.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// It returns the html data as a string if the upstream engine provides the data as expected
|
||||
/// otherwise it returns a custom `EngineError`.
|
||||
async fn fetch_html_from_upstream(
|
||||
&self,
|
||||
url: &str,
|
||||
@ -65,6 +81,24 @@ pub trait SearchEngine: Sync + Send {
|
||||
.change_context(EngineError::RequestError)?)
|
||||
}
|
||||
|
||||
/// This function scrapes results from the upstream engine and puts all the scraped results like
|
||||
/// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
|
||||
/// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
|
||||
/// struct and then returns it within a Result enum.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||
/// * `page` - Takes an u32 as an argument.
|
||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
async fn results(
|
||||
&self,
|
||||
query: &str,
|
||||
@ -75,8 +109,12 @@ pub trait SearchEngine: Sync + Send {
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
}
|
||||
|
||||
/// A named struct which stores the engine struct with the name of the associated engine.
|
||||
pub struct EngineHandler {
|
||||
/// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
|
||||
/// the `SearchEngine` trait.
|
||||
engine: Box<dyn SearchEngine>,
|
||||
/// It stores the name of the engine to which the struct is associated to.
|
||||
name: &'static str,
|
||||
}
|
||||
|
||||
@ -87,7 +125,15 @@ impl Clone for EngineHandler {
|
||||
}
|
||||
|
||||
impl EngineHandler {
|
||||
/// parses an engine name into an engine handler, returns none if the engine is unknown
|
||||
/// Parses an engine name into an engine handler.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `engine_name` - It takes the name of the engine to which the struct was associated to.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// It returns an option either containing the value or a none if the engine is unknown
|
||||
pub fn new(engine_name: &str) -> Option<Self> {
|
||||
let engine: (&'static str, Box<dyn SearchEngine>) =
|
||||
match engine_name.to_lowercase().as_str() {
|
||||
@ -102,6 +148,8 @@ impl EngineHandler {
|
||||
})
|
||||
}
|
||||
|
||||
/// This function converts the EngineHandler type into a tuple containing the engine name and
|
||||
/// the associated engine struct.
|
||||
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
||||
(self.name, self.engine)
|
||||
}
|
||||
|
@ -1,3 +1,8 @@
|
||||
//! This module provides different modules which handles the functionlity to fetch results from the
|
||||
//! upstream search engines based on user requested queries. Also provides different models to
|
||||
//! provide a standard functions to be implemented for all the upstream search engine handling
|
||||
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
||||
|
||||
pub mod duckduckgo;
|
||||
pub mod engine_models;
|
||||
pub mod searx;
|
||||
|
@ -17,25 +17,6 @@ pub struct Searx;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl SearchEngine for Searx {
|
||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
||||
/// values are RawSearchResult struct and then returns it within a Result enum.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||
/// * `page` - Takes an u32 as an argument.
|
||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
|
||||
async fn results(
|
||||
&self,
|
||||
query: &str,
|
||||
|
@ -1 +1,5 @@
|
||||
//! This module provides modules which provide the functionality to handle paths for different
|
||||
//! files present on different paths and provide one appropriate path on which it is present and
|
||||
//! can be used.
|
||||
|
||||
pub mod paths;
|
||||
|
@ -7,42 +7,46 @@ use std::path::Path;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
// ------- Constants --------
|
||||
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
||||
static COMMON_DIRECTORY_NAME: &str = "websurfx";
|
||||
static CONFIG_FILE_NAME: &str = "config.lua";
|
||||
static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
|
||||
static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
|
||||
/// The constant holding the name of the theme folder.
|
||||
const PUBLIC_DIRECTORY_NAME: &str = "public";
|
||||
/// The constant holding the name of the common folder.
|
||||
const COMMON_DIRECTORY_NAME: &str = "websurfx";
|
||||
/// The constant holding the name of the config file.
|
||||
const CONFIG_FILE_NAME: &str = "config.lua";
|
||||
/// The constant holding the name of the AllowList text file.
|
||||
const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
|
||||
/// The constant holding the name of the BlockList text file.
|
||||
const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
|
||||
|
||||
/// An enum type which provides different variants to handle paths for various files/folders.
|
||||
#[derive(Hash, PartialEq, Eq, Debug)]
|
||||
pub enum FileType {
|
||||
/// This variant handles all the paths associated with the config file.
|
||||
Config,
|
||||
/// This variant handles all the paths associated with the Allowlist text file.
|
||||
AllowList,
|
||||
/// This variant handles all the paths associated with the BlockList text file.
|
||||
BlockList,
|
||||
/// This variant handles all the paths associated with the public folder (Theme folder).
|
||||
Theme,
|
||||
}
|
||||
|
||||
/// A static variable which stores the different filesystem paths for various file/folder types.
|
||||
static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
|
||||
|
||||
/// A helper function which returns an appropriate config file path checking if the config
|
||||
/// file exists on that path.
|
||||
/// A function which returns an appropriate path for thr provided file type by checking if the path
|
||||
/// for the given file type exists on that path.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// Returns a `config file not found!!` error if the config file is not present under following
|
||||
/// paths which are:
|
||||
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
|
||||
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
|
||||
/// one (3).
|
||||
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
||||
/// here then it returns an error as mentioned above.
|
||||
|
||||
/// A function which returns an appropriate theme directory path checking if the theme
|
||||
/// directory exists on that path.
|
||||
/// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
|
||||
/// present on the path on which it is being tested.
|
||||
///
|
||||
/// # Error
|
||||
/// # Example
|
||||
///
|
||||
/// If this function is give the file_type of Theme variant then the theme folder is checked by the
|
||||
/// following steps:
|
||||
///
|
||||
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
|
||||
/// paths which are:
|
||||
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
||||
/// 2. Under project folder ( or codebase in other words) if it is not present
|
||||
/// here then it returns an error as mentioned above.
|
||||
@ -110,6 +114,6 @@ pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
|
||||
// if no of the configs above exist, return error
|
||||
Err(Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
format!("{:?} file not found!!", file_type),
|
||||
format!("{:?} file/folder not found!!", file_type),
|
||||
))
|
||||
}
|
||||
|
@ -1,6 +1,10 @@
|
||||
//! This main library module provides the functionality to provide and handle the Tcp server
|
||||
//! and register all the routes for the `websurfx` meta search engine website.
|
||||
|
||||
#![forbid(unsafe_code, clippy::panic)]
|
||||
#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
|
||||
#![warn(clippy::cognitive_complexity, rust_2018_idioms)]
|
||||
|
||||
pub mod cache;
|
||||
pub mod config;
|
||||
pub mod engines;
|
||||
@ -41,7 +45,7 @@ use handler::paths::{file_path, FileType};
|
||||
/// let server = run(listener,config).expect("Failed to start server");
|
||||
/// ```
|
||||
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
||||
let mut handlebars: Handlebars = Handlebars::new();
|
||||
let mut handlebars: Handlebars<'_> = Handlebars::new();
|
||||
|
||||
let public_folder_path: &str = file_path(FileType::Theme)?;
|
||||
|
||||
@ -49,7 +53,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
||||
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
||||
.unwrap();
|
||||
|
||||
let handlebars_ref: web::Data<Handlebars> = web::Data::new(handlebars);
|
||||
let handlebars_ref: web::Data<Handlebars<'_>> = web::Data::new(handlebars);
|
||||
|
||||
let cloned_config_threads_opt: u8 = config.threads;
|
||||
|
||||
|
@ -9,20 +9,17 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
||||
/// A named struct to store the raw scraped search results scraped search results from the
|
||||
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
||||
/// to write idiomatic rust using `Iterators`.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `title` - The title of the search result.
|
||||
/// * `url` - The url which is accessed when clicked on it
|
||||
/// (href url in html in simple words).
|
||||
/// * `description` - The description of the search result.
|
||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
/// The title of the search result.
|
||||
pub title: String,
|
||||
/// The url which is accessed when clicked on it
|
||||
pub url: String,
|
||||
/// The description of the search result.
|
||||
pub description: String,
|
||||
/// The names of the upstream engines from which this results were provided.
|
||||
pub engine: SmallVec<[String; 0]>,
|
||||
}
|
||||
|
||||
@ -64,14 +61,27 @@ impl SearchResult {
|
||||
}
|
||||
}
|
||||
|
||||
/// A named struct that stores the error info related to the upstream search engines.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct EngineErrorInfo {
|
||||
/// It stores the error type which occured while fetching the result from a particular search
|
||||
/// engine.
|
||||
pub error: String,
|
||||
/// It stores the name of the engine that failed to provide the requested search results.
|
||||
pub engine: String,
|
||||
/// It stores the name of the color to indicate whether how severe the particular error is (In
|
||||
/// other words it indicates the severity of the error/issue).
|
||||
pub severity_color: String,
|
||||
}
|
||||
|
||||
impl EngineErrorInfo {
|
||||
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
||||
/// search engine.
|
||||
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
||||
pub fn new(error: &EngineError, engine: &str) -> Self {
|
||||
Self {
|
||||
error: match error {
|
||||
@ -91,25 +101,26 @@ impl EngineErrorInfo {
|
||||
|
||||
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
||||
/// aggregated from the upstream search engines.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
|
||||
/// `SearchResult` structs.
|
||||
/// * `page_query` - Stores the current pages search query `q` provided in the search url.
|
||||
/// * `style` - Stores the theming options for the website.
|
||||
/// * `engine_errors_info` - Stores the information on which engines failed with their engine name
|
||||
/// and the type of error that caused it.
|
||||
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
||||
/// given search query.
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResults {
|
||||
/// Stores the individual serializable `SearchResult` struct into a vector of
|
||||
pub results: Vec<SearchResult>,
|
||||
/// Stores the current pages search query `q` provided in the search url.
|
||||
pub page_query: String,
|
||||
/// Stores the theming options for the website.
|
||||
pub style: Style,
|
||||
/// Stores the information on which engines failed with their engine name
|
||||
/// and the type of error that caused it.
|
||||
pub engine_errors_info: Vec<EngineErrorInfo>,
|
||||
/// Stores the flag option which holds the check value that the following
|
||||
/// search query was disallowed when the safe search level set to 4 and it
|
||||
/// was present in the `Blocklist` file.
|
||||
pub disallowed: bool,
|
||||
/// Stores the flag option which holds the check value that the following
|
||||
/// search query was filtered when the safe search level set to 3 and it
|
||||
/// was present in the `Blocklist` file.
|
||||
pub filtered: bool,
|
||||
}
|
||||
|
||||
@ -122,9 +133,8 @@ impl SearchResults {
|
||||
/// and stores it into a vector of `SearchResult` structs.
|
||||
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
||||
/// the search url.
|
||||
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
||||
/// given search query.
|
||||
/// * ``
|
||||
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
||||
/// which engines failed with their names, reason and their severity color name.
|
||||
pub fn new(
|
||||
results: Vec<SearchResult>,
|
||||
page_query: &str,
|
||||
|
@ -1,3 +1,7 @@
|
||||
//! This module provides modules that handle the functionality to aggregate the fetched search
|
||||
//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
|
||||
//! provides various models to aggregate search results into a standardized form.
|
||||
|
||||
pub mod aggregation_models;
|
||||
pub mod aggregator;
|
||||
pub mod user_agent;
|
||||
|
@ -4,6 +4,8 @@ use std::sync::OnceLock;
|
||||
|
||||
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
||||
|
||||
/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
|
||||
/// again and again without the need of reinitializing the `UserAgents` struct.
|
||||
static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
|
||||
|
||||
/// A function to generate random user agent to improve privacy of the user.
|
||||
|
@ -1 +1,6 @@
|
||||
//! This module provides modules that handle the functionality of handling different routes/paths
|
||||
//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
|
||||
//! the search route. Also, caches the next, current and previous search results in the search
|
||||
//! routes with the help of the redis server.
|
||||
|
||||
pub mod routes;
|
||||
|
@ -25,17 +25,16 @@ use tokio::join;
|
||||
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
||||
|
||||
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `q` - It stores the search parameter option `q` (or query in simple words)
|
||||
/// of the search url.
|
||||
/// * `page` - It stores the search parameter `page` (or pageno in simple words)
|
||||
/// of the search url.
|
||||
#[derive(Deserialize)]
|
||||
struct SearchParams {
|
||||
/// It stores the search parameter option `q` (or query in simple words)
|
||||
/// of the search url.
|
||||
q: Option<String>,
|
||||
/// It stores the search parameter `page` (or pageno in simple words)
|
||||
/// of the search url.
|
||||
page: Option<u32>,
|
||||
/// It stores the search parameter `safesearch` (or safe search level in simple words) of the
|
||||
/// search url.
|
||||
safesearch: Option<u8>,
|
||||
}
|
||||
|
||||
@ -63,17 +62,14 @@ pub async fn not_found(
|
||||
}
|
||||
|
||||
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `theme` - It stores the theme name used in the website.
|
||||
/// * `colorscheme` - It stores the colorscheme name used for the website theme.
|
||||
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Deserialize)]
|
||||
struct Cookie<'a> {
|
||||
/// It stores the theme name used in the website.
|
||||
theme: &'a str,
|
||||
/// It stores the colorscheme name used for the website theme.
|
||||
colorscheme: &'a str,
|
||||
/// It stores the user selected upstream search engines selected from the UI.
|
||||
engines: Vec<&'a str>,
|
||||
}
|
||||
|
||||
@ -174,8 +170,21 @@ pub async fn search(
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetches the results for a query and page.
|
||||
/// First checks the redis cache, if that fails it gets proper results
|
||||
/// Fetches the results for a query and page. It First checks the redis cache, if that
|
||||
/// fails it gets proper results by requesting from the upstream search engines.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes the url of the current page that requested the search results for a
|
||||
/// particular search query.
|
||||
/// * `config` - It takes a parsed config struct.
|
||||
/// * `query` - It takes the page number as u32 value.
|
||||
/// * `req` - It takes the `HttpRequest` struct as a value.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
||||
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
||||
async fn results(
|
||||
url: String,
|
||||
config: &Config,
|
||||
@ -184,6 +193,7 @@ async fn results(
|
||||
req: HttpRequest,
|
||||
safe_search: u8,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
// Initialize redis cache connection struct
|
||||
let mut redis_cache: RedisCache = REDIS_CACHE
|
||||
.get_or_init(async {
|
||||
// Initialize redis cache connection pool only one and store it in the heap.
|
||||
@ -191,7 +201,6 @@ async fn results(
|
||||
})
|
||||
.await
|
||||
.clone();
|
||||
|
||||
// fetch the cached results json.
|
||||
let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
|
||||
redis_cache.clone().cached_json(&url).await;
|
||||
@ -223,7 +232,8 @@ async fn results(
|
||||
// UI and use that.
|
||||
let mut results: SearchResults = match req.cookie("appCookie") {
|
||||
Some(cookie_value) => {
|
||||
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
||||
let cookie_value: Cookie<'_> =
|
||||
serde_json::from_str(cookie_value.name_value().1)?;
|
||||
|
||||
let engines: Vec<EngineHandler> = cookie_value
|
||||
.engines
|
||||
@ -267,6 +277,8 @@ async fn results(
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper function which checks whether the search query contains any keywords which should be
|
||||
/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
|
||||
fn is_match_from_filter_list(
|
||||
file_path: &str,
|
||||
query: &str,
|
||||
|
Loading…
Reference in New Issue
Block a user