From d451fddf490c82e216dd27b97848c84f7315caa2 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sat, 2 Sep 2023 20:18:09 +0300 Subject: [PATCH 01/23] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20new=20config=20?= =?UTF-8?q?option=20to=20configure=20rate=20limiting=20middleware=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- websurfx/config.lua | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/websurfx/config.lua b/websurfx/config.lua index 4f2633c..3335ae8 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -10,6 +10,10 @@ production_use = false -- whether to use production mode or not (in other words -- if production_use is set to true -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests. request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds). +rate_limiter = { + number_of_requests = 20, -- The number of request that are allowed within a provided time limit. + time_limit = 3, -- The time limit in which the quantity of requests that should be accepted. +} -- ### Website ### -- The different colorschemes provided are: @@ -34,4 +38,7 @@ theme = "simple" -- the theme name which should be used for the website redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on. -- ### Search Engines ### -upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched. +upstream_search_engines = { + DuckDuckGo = true, + Searx = false, +} -- select the upstream search engines from which the results should be fetched. From 4eb75a8cb16dc52a0d87da45c94cba3aa8e0e684 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sat, 2 Sep 2023 20:19:43 +0300 Subject: [PATCH 02/23] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20code=20to=20par?= =?UTF-8?q?se=20the=20new=20config=20option=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/parser.rs | 20 ++++++++------------ src/config/parser_models.rs | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index 4639013..dbebfd0 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -3,7 +3,7 @@ use crate::handler::paths::{file_path, FileType}; -use super::parser_models::Style; +use super::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; use rlua::Lua; use std::{collections::HashMap, fs, thread::available_parallelism}; @@ -35,17 +35,7 @@ pub struct Config { pub upstream_search_engines: Vec, pub request_timeout: u8, pub threads: u8, -} - -/// Configuration options for the aggregator. -/// -/// # Fields -/// -/// * `random_delay` - It stores the option to whether enable or disable random delays between -/// requests. -#[derive(Clone)] -pub struct AggregatorConfig { - pub random_delay: bool, + pub rate_limter: RateLimiter, } impl Config { @@ -88,6 +78,8 @@ impl Config { parsed_threads }; + let rate_limter = globals.get::<_,HashMap>("rate_limiter")?; + Ok(Config { port: globals.get::<_, u16>("port")?, binding_ip: globals.get::<_, String>("binding_ip")?, @@ -109,6 +101,10 @@ impl Config { .collect(), request_timeout: globals.get::<_, u8>("request_timeout")?, threads, + rate_limter: RateLimiter { + number_of_requests: rate_limter["number_of_requests"], + time_limit: rate_limter["time_limit"], + } }) }) } diff --git a/src/config/parser_models.rs b/src/config/parser_models.rs index 0bc52d8..14e4dd6 100644 --- a/src/config/parser_models.rs +++ b/src/config/parser_models.rs @@ -36,3 +36,26 @@ impl Style { Style { theme, colorscheme } } } + +/// Configuration options for the aggregator. +/// +/// # Fields +/// +/// * `random_delay` - It stores the option to whether enable or disable random delays between +/// requests. +#[derive(Clone)] +pub struct AggregatorConfig { + pub random_delay: bool, +} + +/// Configuration options for the rate limter middleware. +/// +/// # Fields +/// +/// * `number_of_requests` - +/// * `time_limit` - +#[derive(Clone)] +pub struct RateLimiter { + pub number_of_requests: u8, + pub time_limit: u8, +} From 8cba040d80857a2b381857cfb1c25be30cdf29b4 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sat, 2 Sep 2023 20:21:48 +0300 Subject: [PATCH 03/23] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20code=20to=20par?= =?UTF-8?q?se=20the=20new=20config=20option=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/parser_models.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/parser_models.rs b/src/config/parser_models.rs index 14e4dd6..4a986fd 100644 --- a/src/config/parser_models.rs +++ b/src/config/parser_models.rs @@ -52,8 +52,8 @@ pub struct AggregatorConfig { /// /// # Fields /// -/// * `number_of_requests` - -/// * `time_limit` - +/// * `number_of_requests` - The number of request that are allowed within a provided time limit. +/// * `time_limit` - The time limit in which the quantity of requests that should be accepted. #[derive(Clone)] pub struct RateLimiter { pub number_of_requests: u8, From 51937a0d494594709cd80e65d7ac9334662b841b Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sat, 2 Sep 2023 20:22:24 +0300 Subject: [PATCH 04/23] =?UTF-8?q?=E2=9C=A8=20feat:=20pass=20the=20new=20co?= =?UTF-8?q?nfig=20option=20into=20the=20middleware=20config=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index cd83d8a..be526d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ use crate::server::routes; use actix_cors::Cors; use actix_files as fs; +use actix_governor::{Governor, GovernorConfigBuilder}; use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer}; use config::parser::Config; use handlebars::Handlebars; @@ -64,10 +65,17 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { ]); App::new() + .wrap(Logger::default()) // added logging middleware for logging. .app_data(handlebars_ref.clone()) .app_data(web::Data::new(config.clone())) .wrap(cors) - .wrap(Logger::default()) // added logging middleware for logging. + .wrap(Governor::new( + &GovernorConfigBuilder::default() + .per_second(config.rate_limter.time_limit as u64) + .burst_size(config.rate_limter.number_of_requests as u32) + .finish() + .unwrap(), + )) // Serve images and static files (css and js files). .service( fs::Files::new("/static", format!("{}/static", public_folder_path)) From 88e569bca7466337a46511b6246d6caa85becbf5 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sat, 2 Sep 2023 20:22:59 +0300 Subject: [PATCH 05/23] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20`actix-governor?= =?UTF-8?q?`=20crate=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 188 ++++++++++++++++++++++++++++++++++++++++++++++++----- Cargo.toml | 1 + 2 files changed, 171 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6bbaab6..e15dc24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,18 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "actix-governor" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46ff2d40f2bc627b8054c5e20fa6b0b0cf9428699b54bd41634e9ae3098ad555" +dependencies = [ + "actix-http", + "actix-web", + "futures 0.3.28", + "governor", +] + [[package]] name = "actix-http" version = "3.4.0" @@ -801,6 +813,19 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if 1.0.0", + "hashbrown 0.14.0", + "lock_api 0.4.10", + "once_cell", + "parking_lot_core 0.9.8", +] + [[package]] name = "deranged" version = "0.3.8" @@ -1049,6 +1074,21 @@ version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.28" @@ -1056,6 +1096,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -1070,10 +1111,38 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab90cde24b3319636588d0c35fe03b1333857621051837ed769faefb4c2162e4" dependencies = [ - "futures", + "futures 0.1.31", "num_cpus", ] +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", +] + [[package]] name = "futures-sink" version = "0.3.28" @@ -1086,16 +1155,28 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +[[package]] +name = "futures-timer" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" + [[package]] name = "futures-util" version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -1143,6 +1224,24 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +[[package]] +name = "governor" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c390a940a5d157878dd057c78680a33ce3415bcd05b4799509ea44210914b4d5" +dependencies = [ + "cfg-if 1.0.0", + "dashmap", + "futures 0.3.28", + "futures-timer", + "no-std-compat", + "nonzero_ext", + "parking_lot 0.12.1", + "quanta", + "rand 0.8.5", + "smallvec 1.11.0", +] + [[package]] name = "h2" version = "0.1.26" @@ -1152,7 +1251,7 @@ dependencies = [ "byteorder", "bytes 0.4.12", "fnv", - "futures", + "futures 0.1.31", "http 0.1.21", "indexmap", "log", @@ -1207,6 +1306,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "hermit-abi" version = "0.3.2" @@ -1270,7 +1375,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6741c859c1b2463a423a1dbce98d418e6c3c3fc720fb0d45528657320920292d" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "http 0.1.21", "tokio-buf", ] @@ -1317,7 +1422,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c843caf6296fc1f93444735205af9ed4e109a539005abb2564ae1d6fad34c52" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "futures-cpupool", "h2 0.1.26", "http 0.1.21", @@ -1371,7 +1476,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a800d6aa50af4b5850b2b0f659625ce9504df908e9733b635720483be26174f" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "hyper 0.12.36", "native-tls", "tokio-io", @@ -1429,7 +1534,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg 1.1.0", - "hashbrown", + "hashbrown 0.12.3", ] [[package]] @@ -1580,6 +1685,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "markup5ever" version = "0.8.1" @@ -1762,6 +1876,18 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0" +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" + +[[package]] +name = "nonzero_ext" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" + [[package]] name = "num-traits" version = "0.2.16" @@ -2162,6 +2288,22 @@ dependencies = [ "url 2.4.1", ] +[[package]] +name = "quanta" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20afe714292d5e879d8b12740aa223c6a88f118af41870e8b6196e39a02238a8" +dependencies = [ + "crossbeam-utils 0.8.16", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.0+wasi-snapshot-preview1", + "web-sys", + "winapi 0.3.9", +] + [[package]] name = "quote" version = "0.6.13" @@ -2316,6 +2458,15 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rayon" version = "1.7.0" @@ -2418,7 +2569,7 @@ dependencies = [ "cookie_store", "encoding_rs", "flate2", - "futures", + "futures 0.1.31", "http 0.1.21", "hyper 0.12.36", "hyper-tls 0.3.2", @@ -3071,7 +3222,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a09c0b5bb588872ab2f09afa13ee6e9dac11e10a0ec9e8e3ba39a5a5d530af6" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "mio 0.6.23", "num_cpus", "tokio-current-thread", @@ -3110,7 +3261,7 @@ checksum = "8fb220f46c53859a4b7ec083e41dec9778ff0b1851c0942b211edb89e0ccdc46" dependencies = [ "bytes 0.4.12", "either", - "futures", + "futures 0.1.31", ] [[package]] @@ -3119,7 +3270,7 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1de0e32a83f131e002238d7ccde18211c0a5397f60cbfffcb112868c2e0e20e" dependencies = [ - "futures", + "futures 0.1.31", "tokio-executor", ] @@ -3130,7 +3281,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb2d1b8f4548dbf5e1f7818512e9c406860678f29c300cdf0ebac72d1a3a1671" dependencies = [ "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", ] [[package]] @@ -3140,7 +3291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57fc868aae093479e3131e3d165c93b1c7474109d13c90ec0dda2a1bbfff0674" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "log", ] @@ -3172,7 +3323,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09bc590ec4ba8ba87652da2068d150dcada2cfa2e07faae270a5e0409aa51351" dependencies = [ "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", "lazy_static", "log", "mio 0.6.23", @@ -3191,7 +3342,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edfe50152bc8164fcc456dab7891fa9bf8beaf01c5ee7e1dd43a397c3cf87dee" dependencies = [ "fnv", - "futures", + "futures 0.1.31", ] [[package]] @@ -3201,7 +3352,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98df18ed66e3b72e742f185882a9e201892407957e45fbff8da17ae7a7c51f72" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "iovec", "mio 0.6.23", "tokio-io", @@ -3217,7 +3368,7 @@ dependencies = [ "crossbeam-deque 0.7.4", "crossbeam-queue", "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", "lazy_static", "log", "num_cpus", @@ -3232,7 +3383,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93044f2d313c95ff1cb7809ce9a7a05735b012288a888b62d4434fd58c94f296" dependencies = [ "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", "slab", "tokio-executor", ] @@ -3427,7 +3578,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6395efa4784b027708f7451087e647ec73cc74f5d9bc2e418404248d679a230" dependencies = [ - "futures", + "futures 0.1.31", "log", "try-lock", ] @@ -3535,6 +3686,7 @@ version = "0.18.6" dependencies = [ "actix-cors", "actix-files", + "actix-governor", "actix-web", "async-trait", "criterion", diff --git a/Cargo.toml b/Cargo.toml index ed0e24b..1f59275 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ scraper = {version="0.17.1"} actix-web = {version="4.4.0", features = ["cookies"]} actix-files = {version="0.6.2"} actix-cors = {version="0.6.4"} +actix-governor = {version="0.4.1"} serde_json = {version="1.0.105"} fake-useragent = {version="0.1.3"} env_logger = {version="0.10.0"} From 9fec52f784fb0d3b04c2631ce09f2332c93ce2f1 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sat, 2 Sep 2023 20:25:17 +0300 Subject: [PATCH 06/23] =?UTF-8?q?=F0=9F=9A=80=20chore:=20bump=20the=20app?= =?UTF-8?q?=20version=20&=20bump=20the=20crates=20version=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 18 +++++++++--------- Cargo.toml | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e15dc24..2ba60bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1287,9 +1287,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "handlebars" -version = "4.3.7" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d" +checksum = "c39b3bc2a8f715298032cf5087e58573809374b08160aa7d750582bdb82d2683" dependencies = [ "log", "pest", @@ -2500,9 +2500,9 @@ dependencies = [ [[package]] name = "redis" -version = "0.23.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffd6543a7bc6428396845f6854ccf3d1ae8823816592e2cbe74f20f50f209d02" +checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba" dependencies = [ "combine", "itoa 1.0.9", @@ -2530,9 +2530,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12de2eff854e5fa4b1295edd650e227e9d8fb0c9e90b12e7f36d6a6811791a29" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", @@ -2542,9 +2542,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" dependencies = [ "aho-corasick", "memchr", @@ -3682,7 +3682,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.18.6" +version = "0.20.0" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index 1f59275..e5434f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.18.6" +version = "0.20.0" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" @@ -10,7 +10,7 @@ license = "AGPL-3.0" reqwest = {version="0.11.20",features=["json"]} tokio = {version="1.32.0",features=["full"]} serde = {version="1.0.188",features=["derive"]} -handlebars = { version = "4.3.7", features = ["dir_source"] } +handlebars = { version = "4.4.0", features = ["dir_source"] } scraper = {version="0.17.1"} actix-web = {version="4.4.0", features = ["cookies"]} actix-files = {version="0.6.2"} @@ -21,13 +21,13 @@ fake-useragent = {version="0.1.3"} env_logger = {version="0.10.0"} log = {version="0.4.20"} rlua = {version="0.19.7"} -redis = {version="0.23.2"} +redis = {version="0.23.3"} md5 = {version="0.7.0"} rand={version="0.8.5"} once_cell = {version="1.18.0"} error-stack = {version="0.4.0"} async-trait = {version="0.1.73"} -regex = {version="1.9.4", features=["perf"]} +regex = {version="1.9.5", features=["perf"]} [dev-dependencies] rusty-hook = "^0.11.2" From 0d2d44988970225323cb35225d20e1b9443fae46 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 19:21:23 +0300 Subject: [PATCH 07/23] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20refactor:=20add=20li?= =?UTF-8?q?nts=20to=20the=20codebase=20to=20ensure=20proper=20code=20style?= =?UTF-8?q?=20(#205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index cd83d8a..52fb56d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,10 @@ //! This main library module provides the functionality to provide and handle the Tcp server //! and register all the routes for the `websurfx` meta search engine website. +#![forbid(unsafe_code, clippy::panic)] +#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)] +#![warn(clippy::cognitive_complexity, rust_2018_idioms)] + pub mod cache; pub mod config; pub mod engines; @@ -40,7 +44,7 @@ use handler::paths::{file_path, FileType}; /// let server = run(listener,config).expect("Failed to start server"); /// ``` pub fn run(listener: TcpListener, config: Config) -> std::io::Result { - let mut handlebars: Handlebars = Handlebars::new(); + let mut handlebars: Handlebars<'_> = Handlebars::new(); let public_folder_path: String = file_path(FileType::Theme)?; @@ -48,7 +52,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { .register_templates_directory(".html", format!("{}/templates", public_folder_path)) .unwrap(); - let handlebars_ref: web::Data = web::Data::new(handlebars); + let handlebars_ref: web::Data> = web::Data::new(handlebars); let cloned_config_threads_opt: u8 = config.threads; From 049b1c1ddda15d7b109b0be8c2942b8f60736f33 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 19:23:34 +0300 Subject: [PATCH 08/23] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20refactor:=20change?= =?UTF-8?q?=20&=20add=20documentation=20to=20the=20code=20based=20on=20the?= =?UTF-8?q?=20lints=20(#205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cache/cacher.rs | 5 +-- src/cache/mod.rs | 3 ++ src/config/mod.rs | 3 ++ src/config/parser.rs | 37 ++++++++--------- src/config/parser_models.rs | 9 ++-- src/engines/duckduckgo.rs | 20 +-------- src/engines/engine_models.rs | 68 ++++++++++++++++++++++++++----- src/engines/mod.rs | 5 +++ src/engines/searx.rs | 19 --------- src/handler/mod.rs | 4 ++ src/handler/paths.rs | 46 +++++++++++---------- src/results/aggregation_models.rs | 40 ++++++++++-------- src/results/mod.rs | 4 ++ src/results/user_agent.rs | 2 + src/server/mod.rs | 5 +++ src/server/routes.rs | 39 ++++++++++-------- 16 files changed, 177 insertions(+), 132 deletions(-) diff --git a/src/cache/cacher.rs b/src/cache/cacher.rs index 44d0710..e268ac2 100644 --- a/src/cache/cacher.rs +++ b/src/cache/cacher.rs @@ -6,11 +6,8 @@ use redis::{Client, Commands, Connection}; /// A named struct which stores the redis Connection url address to which the client will /// connect to. -/// -/// # Fields -/// -/// * `redis_connection_url` - It stores the redis Connection url address. pub struct RedisCache { + /// It stores the redis Connection url address. connection: Connection, } diff --git a/src/cache/mod.rs b/src/cache/mod.rs index de7dd4e..82e3377 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -1 +1,4 @@ +//! This module provides the modules which provide the functionality to cache the aggregated +//! results fetched and aggregated from the upstream search engines in a json format. + pub mod cacher; diff --git a/src/config/mod.rs b/src/config/mod.rs index 11ce559..331a3d7 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,2 +1,5 @@ +//! This module provides the modules which handles the functionality to parse the lua config +//! and convert the config options into rust readable form. + pub mod parser; pub mod parser_models; diff --git a/src/config/parser.rs b/src/config/parser.rs index 4639013..ca53f1b 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -9,42 +9,36 @@ use rlua::Lua; use std::{collections::HashMap, fs, thread::available_parallelism}; /// A named struct which stores the parsed config file options. -/// -/// # Fields -// -/// * `port` - It stores the parsed port number option on which the server should launch. -/// * `binding_ip` - It stores the parsed ip address option on which the server should launch -/// * `style` - It stores the theming options for the website. -/// * `redis_url` - It stores the redis connection url address on which the redis -/// client should connect. -/// * `aggregator` - It stores the option to whether enable or disable production use. -/// * `logging` - It stores the option to whether enable or disable logs. -/// * `debug` - It stores the option to whether enable or disable debug mode. -/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user. -/// * `request_timeout` - It stores the time (secs) which controls the server request timeout. -/// * `threads` - It stores the number of threads which controls the app will use to run. #[derive(Clone)] pub struct Config { + /// It stores the parsed port number option on which the server should launch. pub port: u16, + /// It stores the parsed ip address option on which the server should launch pub binding_ip: String, + /// It stores the theming options for the website. pub style: Style, + /// It stores the redis connection url address on which the redis + /// client should connect. pub redis_url: String, + /// It stores the option to whether enable or disable production use. pub aggregator: AggregatorConfig, + /// It stores the option to whether enable or disable logs. pub logging: bool, + /// It stores the option to whether enable or disable debug mode. pub debug: bool, + /// It stores all the engine names that were enabled by the user. pub upstream_search_engines: Vec, + /// It stores the time (secs) which controls the server request timeout. pub request_timeout: u8, + /// It stores the number of threads which controls the app will use to run. pub threads: u8, } /// Configuration options for the aggregator. -/// -/// # Fields -/// -/// * `random_delay` - It stores the option to whether enable or disable random delays between -/// requests. #[derive(Clone)] pub struct AggregatorConfig { + /// It stores the option to whether enable or disable random delays between + /// requests. pub random_delay: bool, } @@ -115,6 +109,11 @@ impl Config { } /// a helper function that sets the proper logging level +/// +/// # Arguments +/// +/// * `debug` - It takes the option to whether enable or disable debug mode. +/// * `logging` - It takes the option to whether enable or disable logs. fn set_logging_level(debug: bool, logging: bool) { if let Ok(pkg_env_var) = std::env::var("PKG_ENV") { if pkg_env_var.to_lowercase() == "dev" { diff --git a/src/config/parser_models.rs b/src/config/parser_models.rs index 0bc52d8..aa0b86f 100644 --- a/src/config/parser_models.rs +++ b/src/config/parser_models.rs @@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize}; /// order to allow the deserializing the json back to struct in aggregate function in /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass /// it to the template files. -/// -/// # Fields -// -/// * `theme` - It stores the parsed theme option used to set a theme for the website. -/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the -/// theme being used. #[derive(Serialize, Deserialize, Clone)] pub struct Style { + /// It stores the parsed theme option used to set a theme for the website. pub theme: String, + /// It stores the parsed colorscheme option used to set a colorscheme for the + /// theme being used. pub colorscheme: String, } diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 11b7d86..0649385 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -19,25 +19,7 @@ pub struct DuckDuckGo; #[async_trait::async_trait] impl SearchEngine for DuckDuckGo { - /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped - /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) - /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and - /// values are RawSearchResult struct and then returns it within a Result enum. - /// - /// # Arguments - /// - /// * `query` - Takes the user provided query to query to the upstream search engine with. - /// * `page` - Takes an u32 as an argument. - /// * `user_agent` - Takes a random user agent string as an argument. - /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. - /// - /// # Errors - /// - /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to - /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to - /// provide results for the requested search query and also returns error if the scraping selector - /// or HeaderMap fails to initialize. - async fn results( + async fn results( &self, query: String, page: u32, diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs index d33d13c..2f28ee5 100644 --- a/src/engines/engine_models.rs +++ b/src/engines/engine_models.rs @@ -6,19 +6,18 @@ use error_stack::{IntoReport, Result, ResultExt}; use std::{collections::HashMap, fmt, time::Duration}; /// A custom error type used for handle engine associated errors. -/// -/// This enum provides variants three different categories of errors: -/// * `RequestError` - This variant handles all request related errors like forbidden, not found, -/// etc. -/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream -/// search engines. -/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely -/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and -/// all other errors occurring within the code handling the `upstream search engines`. #[derive(Debug)] pub enum EngineError { + /// This variant handles all request related errors like forbidden, not found, + /// etc. EmptyResultSet, + /// This variant handles the not results found error provide by the upstream + /// search engines. RequestError, + /// This variant handles all the errors which are unexpected or occur rarely + /// and are errors mostly related to failure in initialization of HeaderMap, + /// Selector errors and all other errors occurring within the code handling + /// the `upstream search engines`. UnexpectedError, } @@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {} /// A trait to define common behavior for all search engines. #[async_trait::async_trait] pub trait SearchEngine: Sync + Send { + /// This helper function fetches/requests the search results from the upstream search engine in + /// an html form. + /// + /// # Arguments + /// + /// * `url` - It takes the url of the upstream search engine with the user requested search + /// query appended in the search parameters. + /// * `header_map` - It takes the http request headers to be sent to the upstream engine in + /// order to prevent being detected as a bot. It takes the header as a HeaderMap type. + /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit + /// the amount of time for each request to remain connected when until the results can be provided + /// by the upstream engine. + /// + /// # Error + /// + /// It returns the html data as a string if the upstream engine provides the data as expected + /// otherwise it returns a custom `EngineError`. async fn fetch_html_from_upstream( &self, url: String, @@ -67,6 +83,24 @@ pub trait SearchEngine: Sync + Send { .change_context(EngineError::RequestError)?) } + /// This function scrapes results from the upstream engine and puts all the scraped results like + /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description + /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult + /// struct and then returns it within a Result enum. + /// + /// # Arguments + /// + /// * `query` - Takes the user provided query to query to the upstream search engine with. + /// * `page` - Takes an u32 as an argument. + /// * `user_agent` - Takes a random user agent string as an argument. + /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. + /// + /// # Errors + /// + /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to + /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to + /// provide results for the requested search query and also returns error if the scraping selector + /// or HeaderMap fails to initialize. async fn results( &self, query: String, @@ -76,8 +110,12 @@ pub trait SearchEngine: Sync + Send { ) -> Result, EngineError>; } +/// A named struct which stores the engine struct with the name of the associated engine. pub struct EngineHandler { + /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements + /// the `SearchEngine` trait. engine: Box, + /// It stores the name of the engine to which the struct is associated to. name: &'static str, } @@ -88,7 +126,15 @@ impl Clone for EngineHandler { } impl EngineHandler { - /// parses an engine name into an engine handler, returns none if the engine is unknown + /// Parses an engine name into an engine handler. + /// + /// # Arguments + /// + /// * `engine_name` - It takes the name of the engine to which the struct was associated to. + /// + /// # Returns + /// + /// It returns an option either containing the value or a none if the engine is unknown pub fn new(engine_name: &str) -> Option { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { @@ -103,6 +149,8 @@ impl EngineHandler { }) } + /// This function converts the EngineHandler type into a tuple containing the engine name and + /// the associated engine struct. pub fn into_name_engine(self) -> (&'static str, Box) { (self.name, self.engine) } diff --git a/src/engines/mod.rs b/src/engines/mod.rs index f9bb8ad..8267c93 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -1,3 +1,8 @@ +//! This module provides different modules which handles the functionlity to fetch results from the +//! upstream search engines based on user requested queries. Also provides different models to +//! provide a standard functions to be implemented for all the upstream search engine handling +//! code. Moreover, it also provides a custom error for the upstream search engine handling code. + pub mod duckduckgo; pub mod engine_models; pub mod searx; diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 4ad41f5..3f261ad 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -17,25 +17,6 @@ pub struct Searx; #[async_trait::async_trait] impl SearchEngine for Searx { - /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped - /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) - /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and - /// values are RawSearchResult struct and then returns it within a Result enum. - /// - /// # Arguments - /// - /// * `query` - Takes the user provided query to query to the upstream search engine with. - /// * `page` - Takes an u32 as an argument. - /// * `user_agent` - Takes a random user agent string as an argument. - /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. - /// - /// # Errors - /// - /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to - /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to - /// provide results for the requested search query and also returns error if the scraping selector - /// or HeaderMap fails to initialize. - async fn results( &self, query: String, diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 8118b29..188767d 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -1 +1,5 @@ +//! This module provides modules which provide the functionality to handle paths for different +//! files present on different paths and provide one appropriate path on which it is present and +//! can be used. + pub mod paths; diff --git a/src/handler/paths.rs b/src/handler/paths.rs index 9b4fa07..44228d8 100644 --- a/src/handler/paths.rs +++ b/src/handler/paths.rs @@ -6,20 +6,31 @@ use std::io::Error; use std::path::Path; // ------- Constants -------- -static PUBLIC_DIRECTORY_NAME: &str = "public"; -static COMMON_DIRECTORY_NAME: &str = "websurfx"; -static CONFIG_FILE_NAME: &str = "config.lua"; -static ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; -static BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; +/// The constant holding the name of the theme folder. +const PUBLIC_DIRECTORY_NAME: &str = "public"; +/// The constant holding the name of the common folder. +const COMMON_DIRECTORY_NAME: &str = "websurfx"; +/// The constant holding the name of the config file. +const CONFIG_FILE_NAME: &str = "config.lua"; +/// The constant holding the name of the AllowList text file. +const ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; +/// The constant holding the name of the BlockList text file. +const BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; +/// An enum type which provides different variants to handle paths for various files/folders. #[derive(Hash, PartialEq, Eq, Debug)] pub enum FileType { + /// This variant handles all the paths associated with the config file. Config, + /// This variant handles all the paths associated with the Allowlist text file. AllowList, + /// This variant handles all the paths associated with the BlockList text file. BlockList, + /// This variant handles all the paths associated with the public folder (Theme folder). Theme, } +/// A static variable which stores the different filesystem paths for various file/folder types. static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy>> = once_cell::sync::Lazy::new(|| { HashMap::from([ @@ -72,26 +83,19 @@ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy folder/file not found!!` error if the give file_type folder/file is not +/// present on the path on which it is being tested. /// -/// # Error +/// # Example +/// +/// If this function is give the file_type of Theme variant then the theme folder is checked by the +/// following steps: /// -/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following -/// paths which are: /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) /// 2. Under project folder ( or codebase in other words) if it is not present /// here then it returns an error as mentioned above. @@ -106,6 +110,6 @@ pub fn file_path(file_type: FileType) -> Result { // if no of the configs above exist, return error Err(Error::new( std::io::ErrorKind::NotFound, - format!("{:?} file not found!!", file_type), + format!("{:?} file/folder not found!!", file_type), )) } diff --git a/src/results/aggregation_models.rs b/src/results/aggregation_models.rs index e985765..76d896d 100644 --- a/src/results/aggregation_models.rs +++ b/src/results/aggregation_models.rs @@ -8,20 +8,17 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError}; /// A named struct to store the raw scraped search results scraped search results from the /// upstream search engines before aggregating it.It derives the Clone trait which is needed /// to write idiomatic rust using `Iterators`. -/// -/// # Fields -/// -/// * `title` - The title of the search result. -/// * `url` - The url which is accessed when clicked on it /// (href url in html in simple words). -/// * `description` - The description of the search result. -/// * `engine` - The names of the upstream engines from which this results were provided. #[derive(Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SearchResult { + /// The title of the search result. pub title: String, + /// The url which is accessed when clicked on it pub url: String, + /// The description of the search result. pub description: String, + /// The names of the upstream engines from which this results were provided. pub engine: Vec, } @@ -63,15 +60,27 @@ impl SearchResult { } } -/// +/// A named struct that stores the error info related to the upstream search engines. #[derive(Serialize, Deserialize)] pub struct EngineErrorInfo { + /// It stores the error type which occured while fetching the result from a particular search + /// engine. pub error: String, + /// It stores the name of the engine that failed to provide the requested search results. pub engine: String, + /// It stores the name of the color to indicate whether how severe the particular error is (In + /// other words it indicates the severity of the error/issue). pub severity_color: String, } impl EngineErrorInfo { + /// Constructs a new `SearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `error` - It takes the error type which occured while fetching the result from a particular + /// search engine. + /// * `engine` - It takes the name of the engine that failed to provide the requested search results. pub fn new(error: &EngineError, engine: String) -> Self { Self { error: match error { @@ -91,23 +100,18 @@ impl EngineErrorInfo { /// A named struct to store, serialize, deserialize the all the search results scraped and /// aggregated from the upstream search engines. -/// -/// # Fields -/// -/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of /// `SearchResult` structs. -/// * `page_query` - Stores the current pages search query `q` provided in the search url. -/// * `style` - Stores the theming options for the website. -/// * `engine_errors_info` - Stores the information on which engines failed with their engine name -/// and the type of error that caused it. -/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the -/// given search query. #[derive(Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SearchResults { + /// Stores the individual serializable `SearchResult` struct into a vector of pub results: Vec, + /// Stores the current pages search query `q` provided in the search url. pub page_query: String, + /// Stores the theming options for the website. pub style: Style, + /// Stores the information on which engines failed with their engine name + /// and the type of error that caused it. pub engine_errors_info: Vec, } diff --git a/src/results/mod.rs b/src/results/mod.rs index 0c13442..b08eec0 100644 --- a/src/results/mod.rs +++ b/src/results/mod.rs @@ -1,3 +1,7 @@ +//! This module provides modules that handle the functionality to aggregate the fetched search +//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also, +//! provides various models to aggregate search results into a standardized form. + pub mod aggregation_models; pub mod aggregator; pub mod user_agent; diff --git a/src/results/user_agent.rs b/src/results/user_agent.rs index 13166bf..8946e84 100644 --- a/src/results/user_agent.rs +++ b/src/results/user_agent.rs @@ -2,6 +2,8 @@ use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder}; +/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused +/// again and again without the need of reinitializing the `UserAgents` struct. static USER_AGENTS: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { UserAgentsBuilder::new() .cache(false) diff --git a/src/server/mod.rs b/src/server/mod.rs index 6a664ab..f5d2ce9 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -1 +1,6 @@ +//! This module provides modules that handle the functionality of handling different routes/paths +//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in +//! the search route. Also, caches the next, current and previous search results in the search +//! routes with the help of the redis server. + pub mod routes; diff --git a/src/server/routes.rs b/src/server/routes.rs index 8910f8f..818fac5 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -17,16 +17,13 @@ use serde::Deserialize; use tokio::join; /// A named struct which deserializes all the user provided search parameters and stores them. -/// -/// # Fields -/// -/// * `q` - It stores the search parameter option `q` (or query in simple words) -/// of the search url. -/// * `page` - It stores the search parameter `page` (or pageno in simple words) -/// of the search url. #[derive(Deserialize)] struct SearchParams { + /// It stores the search parameter option `q` (or query in simple words) + /// of the search url. q: Option, + /// It stores the search parameter `page` (or pageno in simple words) + /// of the search url. page: Option, } @@ -54,17 +51,14 @@ pub async fn not_found( } /// A named struct which is used to deserialize the cookies fetched from the client side. -/// -/// # Fields -/// -/// * `theme` - It stores the theme name used in the website. -/// * `colorscheme` - It stores the colorscheme name used for the website theme. -/// * `engines` - It stores the user selected upstream search engines selected from the UI. #[allow(dead_code)] #[derive(Deserialize)] struct Cookie { + /// It stores the theme name used in the website. theme: String, + /// It stores the colorscheme name used for the website theme. colorscheme: String, + /// It stores the user selected upstream search engines selected from the UI. engines: Vec, } @@ -149,8 +143,21 @@ pub async fn search( } } -/// Fetches the results for a query and page. -/// First checks the redis cache, if that fails it gets proper results +/// Fetches the results for a query and page. It First checks the redis cache, if that +/// fails it gets proper results by requesting from the upstream search engines. +/// +/// # Arguments +/// +/// * `url` - It takes the url of the current page that requested the search results for a +/// particular search query. +/// * `config` - It takes a parsed config struct. +/// * `query` - It takes the page number as u32 value. +/// * `req` - It takes the `HttpRequest` struct as a value. +/// +/// # Error +/// +/// It returns the `SearchResults` struct if the search results could be successfully fetched from +/// the cache or from the upstream search engines otherwise it returns an appropriate error. async fn results( url: String, config: &Config, @@ -158,7 +165,7 @@ async fn results( page: u32, req: HttpRequest, ) -> Result> { - //Initialize redis cache connection struct + // Initialize redis cache connection struct let mut redis_cache = RedisCache::new(config.redis_url.clone())?; // fetch the cached results json. let cached_results_json = redis_cache.cached_json(&url); From 0672b55349fe6e2a65f0419993e309d50dd2351f Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 19:25:33 +0300 Subject: [PATCH 09/23] =?UTF-8?q?=F0=9F=9A=80=20chore:=20bump=20the=20app?= =?UTF-8?q?=20version=20(#205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 74 +++++++++++++++++++++++++++--------------------------- Cargo.toml | 2 +- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6bbaab6..67165f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,7 +103,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -216,7 +216,7 @@ dependencies = [ "actix-router", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -302,7 +302,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -520,18 +520,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.1" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c8d502cbaec4595d2e7d5f61e318f05417bd2b66fdc3809498f0d3fdf0bea27" +checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.4.1" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5891c7bc0edb3e1c2204fc5e94009affabeb1821c9e5fdc3959536c5c0bb984d" +checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" dependencies = [ "anstyle", "clap_lex", @@ -798,7 +798,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -1188,9 +1188,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "handlebars" -version = "4.3.7" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d" +checksum = "c39b3bc2a8f715298032cf5087e58573809374b08160aa7d750582bdb82d2683" dependencies = [ "log", "pest", @@ -1631,9 +1631,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.6.2" +version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5486aed0026218e61b8a01d5fbd5a0a134649abb71a0e53b7bc088529dced86e" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memoffset" @@ -1783,9 +1783,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] @@ -1825,7 +1825,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -1944,7 +1944,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -2046,7 +2046,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -2349,9 +2349,9 @@ dependencies = [ [[package]] name = "redis" -version = "0.23.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffd6543a7bc6428396845f6854ccf3d1ae8823816592e2cbe74f20f50f209d02" +checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba" dependencies = [ "combine", "itoa 1.0.9", @@ -2379,9 +2379,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12de2eff854e5fa4b1295edd650e227e9d8fb0c9e90b12e7f36d6a6811791a29" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", @@ -2391,9 +2391,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" dependencies = [ "aho-corasick", "memchr", @@ -2527,9 +2527,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.10" +version = "0.38.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6248e1caa625eb708e266e06159f135e8c26f2bb7ceb72dc4b2766d0340964" +checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453" dependencies = [ "bitflags 2.4.0", "errno", @@ -2687,7 +2687,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -2926,9 +2926,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.29" +version = "2.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +checksum = "0ddc1f908d32ec46858c2d3b3daa00cc35bf4b6841ce4355c7bb3eedf2283a68" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", @@ -2982,22 +2982,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.47" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.47" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -3152,7 +3152,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", ] [[package]] @@ -3474,7 +3474,7 @@ dependencies = [ "once_cell", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", "wasm-bindgen-shared", ] @@ -3508,7 +3508,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.30", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3531,7 +3531,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.18.6" +version = "0.20.1" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index ed0e24b..cc7309a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.18.6" +version = "0.20.1" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" From 453dbdc47d623725017a684c19ff46b9855c8928 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 19:34:22 +0300 Subject: [PATCH 10/23] =?UTF-8?q?=F0=9F=A7=B9=20chore:=20make=20rustfmt=20?= =?UTF-8?q?happy=20(#205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/engines/duckduckgo.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 0649385..5b7a452 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -19,7 +19,7 @@ pub struct DuckDuckGo; #[async_trait::async_trait] impl SearchEngine for DuckDuckGo { - async fn results( + async fn results( &self, query: String, page: u32, From 493c56bd02c4748cf8fd88df40ae995c6107f5d9 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 20:50:50 +0300 Subject: [PATCH 11/23] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20refactor:=20reorgani?= =?UTF-8?q?ze=20code=20&=20restructure=20codebase=20for=20better=20maintai?= =?UTF-8?q?nability=20(#207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/mod.rs | 1 - src/config/parser.rs | 6 +- src/engines/duckduckgo.rs | 4 +- src/engines/mod.rs | 1 - src/engines/searx.rs | 4 +- src/lib.rs | 15 +-- src/{results => models}/aggregation_models.rs | 2 +- src/{engines => models}/engine_models.rs | 9 +- src/models/mod.rs | 8 ++ src/{config => models}/parser_models.rs | 0 src/models/server_models.rs | 26 +++++ src/results/aggregator.rs | 22 ++--- src/results/mod.rs | 1 - src/server/mod.rs | 1 + src/server/router.rs | 64 +++++++++++++ src/server/routes/mod.rs | 3 + src/server/{routes.rs => routes/search.rs} | 96 ++----------------- 17 files changed, 141 insertions(+), 122 deletions(-) rename src/{results => models}/aggregation_models.rs (98%) rename src/{engines => models}/engine_models.rs (95%) create mode 100644 src/models/mod.rs rename src/{config => models}/parser_models.rs (100%) create mode 100644 src/models/server_models.rs create mode 100644 src/server/router.rs create mode 100644 src/server/routes/mod.rs rename src/server/{routes.rs => routes/search.rs} (62%) diff --git a/src/config/mod.rs b/src/config/mod.rs index 331a3d7..babc54f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2,4 +2,3 @@ //! and convert the config options into rust readable form. pub mod parser; -pub mod parser_models; diff --git a/src/config/parser.rs b/src/config/parser.rs index ca53f1b..72df890 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -3,7 +3,7 @@ use crate::handler::paths::{file_path, FileType}; -use super::parser_models::Style; +use crate::models::parser_models::Style; use log::LevelFilter; use rlua::Lua; use std::{collections::HashMap, fs, thread::available_parallelism}; @@ -27,7 +27,7 @@ pub struct Config { /// It stores the option to whether enable or disable debug mode. pub debug: bool, /// It stores all the engine names that were enabled by the user. - pub upstream_search_engines: Vec, + pub upstream_search_engines: Vec, /// It stores the time (secs) which controls the server request timeout. pub request_timeout: u8, /// It stores the number of threads which controls the app will use to run. @@ -99,7 +99,7 @@ impl Config { .get::<_, HashMap>("upstream_search_engines")? .into_iter() .filter_map(|(key, value)| value.then_some(key)) - .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine)) + .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine)) .collect(), request_timeout: globals.get::<_, u8>("request_timeout")?, threads, diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 5b7a452..66f0c85 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -7,9 +7,9 @@ use std::collections::HashMap; use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use scraper::{Html, Selector}; -use crate::results::aggregation_models::SearchResult; +use crate::models::aggregation_models::SearchResult; -use super::engine_models::{EngineError, SearchEngine}; +use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{IntoReport, Report, Result, ResultExt}; diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 8267c93..0016728 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -4,5 +4,4 @@ //! code. Moreover, it also provides a custom error for the upstream search engine handling code. pub mod duckduckgo; -pub mod engine_models; pub mod searx; diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 3f261ad..611c173 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -6,9 +6,9 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use scraper::{Html, Selector}; use std::collections::HashMap; -use crate::results::aggregation_models::SearchResult; +use crate::models::aggregation_models::SearchResult; -use super::engine_models::{EngineError, SearchEngine}; +use crate::models::engine_models::{EngineError, SearchEngine}; use error_stack::{IntoReport, Report, Result, ResultExt}; /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to diff --git a/src/lib.rs b/src/lib.rs index 52fb56d..97208be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,12 +9,13 @@ pub mod cache; pub mod config; pub mod engines; pub mod handler; +pub mod models; pub mod results; pub mod server; use std::net::TcpListener; -use crate::server::routes; +use crate::server::router; use actix_cors::Cors; use actix_files as fs; @@ -81,12 +82,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { fs::Files::new("/images", format!("{}/images", public_folder_path)) .show_files_listing(), ) - .service(routes::robots_data) // robots.txt - .service(routes::index) // index page - .service(routes::search) // search page - .service(routes::about) // about page - .service(routes::settings) // settings page - .default_service(web::route().to(routes::not_found)) // error page + .service(router::robots_data) // robots.txt + .service(router::index) // index page + .service(router::search) // search page + .service(router::about) // about page + .service(router::settings) // settings page + .default_service(web::route().to(router::not_found)) // error page }) .workers(cloned_config_threads_opt as usize) // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080. diff --git a/src/results/aggregation_models.rs b/src/models/aggregation_models.rs similarity index 98% rename from src/results/aggregation_models.rs rename to src/models/aggregation_models.rs index 76d896d..51a4cc8 100644 --- a/src/results/aggregation_models.rs +++ b/src/models/aggregation_models.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; -use crate::{config::parser_models::Style, engines::engine_models::EngineError}; +use super::{engine_models::EngineError, parser_models::Style}; /// A named struct to store the raw scraped search results scraped search results from the /// upstream search engines before aggregating it.It derives the Clone trait which is needed diff --git a/src/engines/engine_models.rs b/src/models/engine_models.rs similarity index 95% rename from src/engines/engine_models.rs rename to src/models/engine_models.rs index 2f28ee5..f6f99d2 100644 --- a/src/engines/engine_models.rs +++ b/src/models/engine_models.rs @@ -1,7 +1,7 @@ //! This module provides the error enum to handle different errors associated while requesting data from //! the upstream search engines with the search query provided by the user. -use crate::results::aggregation_models::SearchResult; +use super::aggregation_models::SearchResult; use error_stack::{IntoReport, Result, ResultExt}; use std::{collections::HashMap, fmt, time::Duration}; @@ -138,8 +138,11 @@ impl EngineHandler { pub fn new(engine_name: &str) -> Option { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { - "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)), - "searx" => ("searx", Box::new(super::searx::Searx)), + "duckduckgo" => ( + "duckduckgo", + Box::new(crate::engines::duckduckgo::DuckDuckGo), + ), + "searx" => ("searx", Box::new(crate::engines::searx::Searx)), _ => return None, }; diff --git a/src/models/mod.rs b/src/models/mod.rs new file mode 100644 index 0000000..6a7d235 --- /dev/null +++ b/src/models/mod.rs @@ -0,0 +1,8 @@ +//! This module provides modules which in turn provides various models for aggregrating search +//! results, parsing config file, providing trait to standardize search engine handling code, +//! custom engine error for the search engine, etc. + +pub mod aggregation_models; +pub mod engine_models; +pub mod parser_models; +pub mod server_models; diff --git a/src/config/parser_models.rs b/src/models/parser_models.rs similarity index 100% rename from src/config/parser_models.rs rename to src/models/parser_models.rs diff --git a/src/models/server_models.rs b/src/models/server_models.rs new file mode 100644 index 0000000..3da6717 --- /dev/null +++ b/src/models/server_models.rs @@ -0,0 +1,26 @@ +//! This module provides the models to parse cookies and search parameters from the search +//! engine website. +use serde::Deserialize; + +/// A named struct which deserializes all the user provided search parameters and stores them. +#[derive(Deserialize)] +pub struct SearchParams { + /// It stores the search parameter option `q` (or query in simple words) + /// of the search url. + pub q: Option, + /// It stores the search parameter `page` (or pageno in simple words) + /// of the search url. + pub page: Option, +} + +/// A named struct which is used to deserialize the cookies fetched from the client side. +#[allow(dead_code)] +#[derive(Deserialize)] +pub struct Cookie { + /// It stores the theme name used in the website. + pub theme: String, + /// It stores the colorscheme name used for the website theme. + pub colorscheme: String, + /// It stores the user selected upstream search engines selected from the UI. + pub engines: Vec, +} diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 3f06ecb..38cff6a 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -1,27 +1,23 @@ //! This module provides the functionality to scrape and gathers all the results from the upstream //! search engines and then removes duplicate results. +use super::user_agent::random_user_agent; +use crate::handler::paths::{file_path, FileType}; +use crate::models::{ + aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, + engine_models::{EngineError, EngineHandler}, +}; +use error_stack::Report; +use rand::Rng; +use regex::Regex; use std::{ collections::HashMap, io::{BufReader, Read}, time::Duration, }; - -use super::{ - aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, - user_agent::random_user_agent, -}; -use error_stack::Report; -use rand::Rng; -use regex::Regex; use std::{fs::File, io::BufRead}; use tokio::task::JoinHandle; -use crate::{ - engines::engine_models::{EngineError, EngineHandler}, - handler::paths::{file_path, FileType}, -}; - /// Aliases for long type annotations type FutureVec = Vec, Report>>>; diff --git a/src/results/mod.rs b/src/results/mod.rs index b08eec0..9ec3229 100644 --- a/src/results/mod.rs +++ b/src/results/mod.rs @@ -2,6 +2,5 @@ //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also, //! provides various models to aggregate search results into a standardized form. -pub mod aggregation_models; pub mod aggregator; pub mod user_agent; diff --git a/src/server/mod.rs b/src/server/mod.rs index f5d2ce9..7f4274f 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -3,4 +3,5 @@ //! the search route. Also, caches the next, current and previous search results in the search //! routes with the help of the redis server. +pub mod router; pub mod routes; diff --git a/src/server/router.rs b/src/server/router.rs new file mode 100644 index 0000000..69a3ede --- /dev/null +++ b/src/server/router.rs @@ -0,0 +1,64 @@ +//! This module provides the functionality to handle different routes of the `websurfx` +//! meta search engine website and provide appropriate response to each route/page +//! when requested. + +use crate::{ + config::parser::Config, + handler::paths::{file_path, FileType}, +}; +use actix_web::{get, web, HttpRequest, HttpResponse}; +use handlebars::Handlebars; +use std::fs::read_to_string; + +/// Handles the route of index page or main page of the `websurfx` meta search engine website. +#[get("/")] +pub async fn index( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("index", &config.style).unwrap(); + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of any other accessed route/page which is not provided by the +/// website essentially the 404 error page. +pub async fn not_found( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("404", &config.style)?; + + Ok(HttpResponse::Ok() + .content_type("text/html; charset=utf-8") + .body(page_content)) +} + +/// Handles the route of robots.txt page of the `websurfx` meta search engine website. +#[get("/robots.txt")] +pub async fn robots_data(_req: HttpRequest) -> Result> { + let page_content: String = + read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; + Ok(HttpResponse::Ok() + .content_type("text/plain; charset=ascii") + .body(page_content)) +} + +/// Handles the route of about page of the `websurfx` meta search engine website. +#[get("/about")] +pub async fn about( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("about", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of settings page of the `websurfx` meta search engine website. +#[get("/settings")] +pub async fn settings( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("settings", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} diff --git a/src/server/routes/mod.rs b/src/server/routes/mod.rs new file mode 100644 index 0000000..6bc5750 --- /dev/null +++ b/src/server/routes/mod.rs @@ -0,0 +1,3 @@ +//! This module provides modules to handle various routes in the search engine website. + +pub mod search; diff --git a/src/server/routes.rs b/src/server/routes/search.rs similarity index 62% rename from src/server/routes.rs rename to src/server/routes/search.rs index 818fac5..a4839fb 100644 --- a/src/server/routes.rs +++ b/src/server/routes/search.rs @@ -1,67 +1,19 @@ -//! This module provides the functionality to handle different routes of the `websurfx` -//! meta search engine website and provide appropriate response to each route/page -//! when requested. - -use std::fs::read_to_string; +//! This module handles the search route of the search engine website. use crate::{ cache::cacher::RedisCache, config::parser::Config, - engines::engine_models::EngineHandler, - handler::paths::{file_path, FileType}, - results::{aggregation_models::SearchResults, aggregator::aggregate}, + models::{ + aggregation_models::SearchResults, + engine_models::EngineHandler, + server_models::{Cookie, SearchParams}, + }, + results::aggregator::aggregate, }; use actix_web::{get, web, HttpRequest, HttpResponse}; use handlebars::Handlebars; -use serde::Deserialize; use tokio::join; -/// A named struct which deserializes all the user provided search parameters and stores them. -#[derive(Deserialize)] -struct SearchParams { - /// It stores the search parameter option `q` (or query in simple words) - /// of the search url. - q: Option, - /// It stores the search parameter `page` (or pageno in simple words) - /// of the search url. - page: Option, -} - -/// Handles the route of index page or main page of the `websurfx` meta search engine website. -#[get("/")] -pub async fn index( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("index", &config.style).unwrap(); - Ok(HttpResponse::Ok().body(page_content)) -} - -/// Handles the route of any other accessed route/page which is not provided by the -/// website essentially the 404 error page. -pub async fn not_found( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("404", &config.style)?; - - Ok(HttpResponse::Ok() - .content_type("text/html; charset=utf-8") - .body(page_content)) -} - -/// A named struct which is used to deserialize the cookies fetched from the client side. -#[allow(dead_code)] -#[derive(Deserialize)] -struct Cookie { - /// It stores the theme name used in the website. - theme: String, - /// It stores the colorscheme name used for the website theme. - colorscheme: String, - /// It stores the user selected upstream search engines selected from the UI. - engines: Vec, -} - /// Handles the route of search page of the `websurfx` meta search engine website and it takes /// two search url parameters `q` and `page` where `page` parameter is optional. /// @@ -178,9 +130,7 @@ async fn results( // default selected upstream search engines from the config file otherwise // parse the non-empty cookie and grab the user selected engines from the // UI and use that. - let mut results: crate::results::aggregation_models::SearchResults = match req - .cookie("appCookie") - { + let mut results: SearchResults = match req.cookie("appCookie") { Some(cookie_value) => { let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?; @@ -218,33 +168,3 @@ async fn results( } } } - -/// Handles the route of robots.txt page of the `websurfx` meta search engine website. -#[get("/robots.txt")] -pub async fn robots_data(_req: HttpRequest) -> Result> { - let page_content: String = - read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; - Ok(HttpResponse::Ok() - .content_type("text/plain; charset=ascii") - .body(page_content)) -} - -/// Handles the route of about page of the `websurfx` meta search engine website. -#[get("/about")] -pub async fn about( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("about", &config.style)?; - Ok(HttpResponse::Ok().body(page_content)) -} - -/// Handles the route of settings page of the `websurfx` meta search engine website. -#[get("/settings")] -pub async fn settings( - hbs: web::Data>, - config: web::Data, -) -> Result> { - let page_content: String = hbs.render("settings", &config.style)?; - Ok(HttpResponse::Ok().body(page_content)) -} From db009454c82deec7658b05f2b18c076d6d777235 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 21:03:58 +0300 Subject: [PATCH 12/23] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20refactor:=20change?= =?UTF-8?q?=20import=20path=20for=20search=20route=20(#207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 97208be..f03751b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,7 +84,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { ) .service(router::robots_data) // robots.txt .service(router::index) // index page - .service(router::search) // search page + .service(server::routes::search::search) // search page .service(router::about) // about page .service(router::settings) // settings page .default_service(web::route().to(router::not_found)) // error page From 485a5a1e8a6c491cf7ae129fe05f41713ff0211b Mon Sep 17 00:00:00 2001 From: neon_arch Date: Sun, 3 Sep 2023 21:05:31 +0300 Subject: [PATCH 13/23] =?UTF-8?q?=F0=9F=9A=80=20chore:=20bump=20the=20app?= =?UTF-8?q?=20version=20(#207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 30 +++++++++++++++--------------- Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67165f7..aa4127a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,7 +103,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -216,7 +216,7 @@ dependencies = [ "actix-router", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -302,7 +302,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -798,7 +798,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -1825,7 +1825,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -1944,7 +1944,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -2046,7 +2046,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -2687,7 +2687,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -2926,9 +2926,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.30" +version = "2.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ddc1f908d32ec46858c2d3b3daa00cc35bf4b6841ce4355c7bb3eedf2283a68" +checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", @@ -2997,7 +2997,7 @@ checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -3152,7 +3152,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", ] [[package]] @@ -3474,7 +3474,7 @@ dependencies = [ "once_cell", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", "wasm-bindgen-shared", ] @@ -3508,7 +3508,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.30", + "syn 2.0.31", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3531,7 +3531,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.20.1" +version = "0.20.2" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index cc7309a..fa46291 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.20.1" +version = "0.20.2" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" From 0c0442da253629cbe53a543e0b9bc356b7c73500 Mon Sep 17 00:00:00 2001 From: alamin655 Date: Mon, 11 Sep 2023 11:43:09 +0530 Subject: [PATCH 14/23] Update Cargo.toml --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 3d9363b..f24da86 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ once_cell = {version="1.18.0"} error-stack = {version="0.4.0"} async-trait = {version="0.1.73"} regex = {version="1.9.5", features=["perf"]} +dhat = {version="0.3.2", optional = true} [dev-dependencies] rusty-hook = "^0.11.2" From 70a5a2463b26b60ddb2fa6822d3659063af2d704 Mon Sep 17 00:00:00 2001 From: alamin655 Date: Mon, 11 Sep 2023 11:46:56 +0530 Subject: [PATCH 15/23] Update src/config/parser.rs --- src/config/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index 2cfc136..9a2f727 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -35,7 +35,7 @@ pub struct Config { pub upstream_search_engines: Vec, pub request_timeout: u8, pub threads: u8, - pub rate_limter: RateLimiter, + pub rate_limiter: RateLimiter, } impl Config { From 531611f077b8a8a371e8ac82595644f6861839e3 Mon Sep 17 00:00:00 2001 From: alamin655 Date: Mon, 11 Sep 2023 11:54:21 +0530 Subject: [PATCH 16/23] Update src/config/parser.rs --- src/config/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index 9a2f727..c797857 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -79,7 +79,7 @@ impl Config { parsed_threads }; - let rate_limter = globals.get::<_,HashMap>("rate_limiter")?; + let rate_limiter = globals.get::<_,HashMap>("rate_limiter")?; Ok(Config { port: globals.get::<_, u16>("port")?, From 3c1aaf63f83fbaad488c095041bc2a15c7be441d Mon Sep 17 00:00:00 2001 From: alamin655 Date: Mon, 11 Sep 2023 11:56:12 +0530 Subject: [PATCH 17/23] Update src/config/parser.rs --- src/config/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index c797857..abf0db2 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -102,9 +102,9 @@ impl Config { .collect(), request_timeout: globals.get::<_, u8>("request_timeout")?, threads, - rate_limter: RateLimiter { - number_of_requests: rate_limter["number_of_requests"], - time_limit: rate_limter["time_limit"], + rate_limiter: RateLimiter { + number_of_requests: rate_limiter["number_of_requests"], + time_limit: rate_limiter["time_limit"], } }) }) From 0132a63e6f88b33d641b6ee54c99a3582f744988 Mon Sep 17 00:00:00 2001 From: alamin655 Date: Mon, 11 Sep 2023 11:58:31 +0530 Subject: [PATCH 18/23] Apply suggestions from code review --- src/config/parser_models.rs | 2 +- src/lib.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/config/parser_models.rs b/src/config/parser_models.rs index 343b70c..21140dd 100644 --- a/src/config/parser_models.rs +++ b/src/config/parser_models.rs @@ -48,7 +48,7 @@ pub struct AggregatorConfig { pub random_delay: bool, } -/// Configuration options for the rate limter middleware. +/// Configuration options for the rate limiter middleware. /// /// # Fields /// diff --git a/src/lib.rs b/src/lib.rs index a1213d6..6c63270 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,8 +71,8 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { .wrap(cors) .wrap(Governor::new( &GovernorConfigBuilder::default() - .per_second(config.rate_limter.time_limit as u64) - .burst_size(config.rate_limter.number_of_requests as u32) + .per_second(config.rate_limiter.time_limit as u64) + .burst_size(config.rate_limiter.number_of_requests as u32) .finish() .unwrap(), )) From 8e56ac8ba2b91b1b0eb419bc37699f63d7f5a39f Mon Sep 17 00:00:00 2001 From: alamin655 Date: Mon, 11 Sep 2023 12:05:13 +0530 Subject: [PATCH 19/23] Update parser.rs --- src/config/parser.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index abf0db2..a3419a9 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -105,8 +105,7 @@ impl Config { rate_limiter: RateLimiter { number_of_requests: rate_limiter["number_of_requests"], time_limit: rate_limiter["time_limit"], - } - }) + } }) } } From 30ca95a217030ec86a1bd108886b95af0764b6c6 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Mon, 11 Sep 2023 20:02:09 +0300 Subject: [PATCH 20/23] =?UTF-8?q?=F0=9F=A7=B9=20chore:=20make=20github=20a?= =?UTF-8?q?ctions=20happy=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 137 ++++++++++++++++++++++++++--------------------------- Cargo.toml | 8 +++- 2 files changed, 74 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f55a3df..bacfd7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,7 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "617a8268e3537fe1d8c9ead925fca49ef6400927ee7bc26750e90ecee14ce4b8" dependencies = [ "bitflags 1.3.2", - "bytes 1.4.0", + "bytes 1.5.0", "futures-core", "futures-sink", "memchr", @@ -46,7 +46,7 @@ dependencies = [ "actix-web", "askama_escape", "bitflags 1.3.2", - "bytes 1.4.0", + "bytes 1.5.0", "derive_more", "futures-core", "http-range", @@ -80,10 +80,10 @@ dependencies = [ "actix-service", "actix-utils", "ahash", - "base64 0.21.3", + "base64 0.21.4", "bitflags 2.4.0", "brotli", - "bytes 1.4.0", + "bytes 1.5.0", "bytestring", "derive_more", "encoding_rs", @@ -115,7 +115,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -195,7 +195,7 @@ dependencies = [ "actix-utils", "actix-web-codegen", "ahash", - "bytes 1.4.0", + "bytes 1.5.0", "bytestring", "cfg-if 1.0.0", "cookie 0.16.2", @@ -228,7 +228,7 @@ dependencies = [ "actix-router", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -290,9 +290,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea" +checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46" [[package]] name = "anyhow" @@ -326,7 +326,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -370,9 +370,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.3" +version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414dcefbc63d77c526a76b3afcf6fbb9b5e2791c19c3aa2297733208750c6e53" +checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" [[package]] name = "bit-set" @@ -465,9 +465,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "bytestring" @@ -475,7 +475,7 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "238e4886760d98c4f899360c834fa93e62cf7f721ac3c2da375cbdf4b8679aae" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", ] [[package]] @@ -582,7 +582,7 @@ version = "4.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "futures-core", "memchr", "pin-project-lite", @@ -602,7 +602,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5" dependencies = [ - "time 0.1.45", + "time 0.1.43", "url 1.7.2", ] @@ -630,7 +630,7 @@ dependencies = [ "publicsuffix", "serde", "serde_json", - "time 0.1.45", + "time 0.1.43", "try_from", "url 1.7.2", ] @@ -826,7 +826,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -975,9 +975,9 @@ dependencies = [ [[package]] name = "error-stack" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6a37ef405b504fc3b87a24fa52906d98cdd1a7d4e5ef2b49f0d5fead138fced" +checksum = "27a72baa257b5e0e2de241967bc5ee8f855d6072351042688621081d66b2a76b" dependencies = [ "anyhow", "rustc_version 0.4.0", @@ -1172,7 +1172,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -1298,7 +1298,7 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "fnv", "futures-core", "futures-sink", @@ -1395,7 +1395,7 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "fnv", "itoa 1.0.9", ] @@ -1418,7 +1418,7 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "http 0.2.9", "pin-project-lite", ] @@ -1465,7 +1465,7 @@ dependencies = [ "log", "net2", "rustc_version 0.2.3", - "time 0.1.45", + "time 0.1.43", "tokio 0.1.22", "tokio-buf", "tokio-executor", @@ -1483,7 +1483,7 @@ version = "0.14.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "futures-channel", "futures-core", "futures-util", @@ -1520,7 +1520,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "hyper 0.14.27", "native-tls", "tokio 1.32.0", @@ -1674,9 +1674,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" [[package]] name = "local-channel" @@ -1787,9 +1787,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.6.2" +version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5486aed0026218e61b8a01d5fbd5a0a134649abb71a0e53b7bc088529dced86e" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memoffset" @@ -1984,9 +1984,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] @@ -2026,7 +2026,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -2037,9 +2037,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.92" +version = "0.9.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db7e971c2c2bba161b2d2fdf37080177eff520b3bc044787c7f1f5f9e78d869b" +checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" dependencies = [ "cc", "libc", @@ -2145,7 +2145,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -2247,7 +2247,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -2294,7 +2294,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -2394,7 +2394,7 @@ dependencies = [ "mach", "once_cell", "raw-cpuid", - "wasi 0.10.0+wasi-snapshot-preview1", + "wasi 0.10.2+wasi-snapshot-preview1", "web-sys", "winapi 0.3.9", ] @@ -2601,7 +2601,7 @@ checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba" dependencies = [ "arc-swap", "async-trait", - "bytes 1.4.0", + "bytes 1.5.0", "combine", "futures 0.3.28", "futures-util", @@ -2684,7 +2684,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded 0.5.5", - "time 0.1.45", + "time 0.1.43", "tokio 0.1.22", "tokio-executor", "tokio-io", @@ -2701,8 +2701,8 @@ version = "0.11.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" dependencies = [ - "base64 0.21.3", - "bytes 1.4.0", + "base64 0.21.4", + "bytes 1.5.0", "encoding_rs", "futures-core", "futures-util", @@ -2764,9 +2764,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.11" +version = "0.38.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453" +checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" dependencies = [ "bitflags 2.4.0", "errno", @@ -2924,14 +2924,14 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] name = "serde_json" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" +checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" dependencies = [ "itoa 1.0.9", "ryu", @@ -3166,9 +3166,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.29" +version = "2.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", @@ -3232,22 +3232,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.47" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.47" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -3258,12 +3258,11 @@ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" [[package]] name = "time" -version = "0.1.45" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" dependencies = [ "libc", - "wasi 0.10.0+wasi-snapshot-preview1", "winapi 0.3.9", ] @@ -3346,7 +3345,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" dependencies = [ "backtrace", - "bytes 1.4.0", + "bytes 1.5.0", "libc", "mio 0.8.8", "num_cpus", @@ -3408,7 +3407,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -3510,7 +3509,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "futures-core", "futures-sink", "pin-project-lite", @@ -3680,9 +3679,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.3" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" dependencies = [ "same-file", "winapi-util", @@ -3710,9 +3709,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasi" @@ -3741,7 +3740,7 @@ dependencies = [ "once_cell", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", "wasm-bindgen-shared", ] @@ -3775,7 +3774,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index f24da86..0747fb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ scraper = {version="0.17.1"} actix-web = {version="4.4.0", features = ["cookies"]} actix-files = {version="0.6.2"} actix-cors = {version="0.6.4"} -actix-governor = {version="0.4.1"} serde_json = {version="1.0.105"} fake-useragent = {version="0.1.3"} env_logger = {version="0.10.0"} @@ -27,8 +26,13 @@ rand={version="0.8.5"} once_cell = {version="1.18.0"} error-stack = {version="0.4.0"} async-trait = {version="0.1.73"} -regex = {version="1.9.5", features=["perf"]} +regex = {version="1.9.4", features=["perf"]} +smallvec = {version="1.11.0", features=["union", "serde"]} +futures = {version="0.3.28"} dhat = {version="0.3.2", optional = true} +mimalloc = { version = "0.1.38", default-features = false } +async-once-cell = {version="0.5.3"} +actix-governor = {version="0.4.1"} [dev-dependencies] rusty-hook = "^0.11.2" From b9d651c378eacfe4f75b2e5cec52644ee3295210 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Mon, 11 Sep 2023 20:16:42 +0300 Subject: [PATCH 21/23] =?UTF-8?q?=F0=9F=A7=B9=20chore:=20make=20github=20a?= =?UTF-8?q?ctions=20happy=20(#203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/parser.rs | 62 ++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index a3419a9..bce9f05 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -36,6 +36,7 @@ pub struct Config { pub request_timeout: u8, pub threads: u8, pub rate_limiter: RateLimiter, + pub safe_search: u8, } impl Config { @@ -79,33 +80,44 @@ impl Config { parsed_threads }; - let rate_limiter = globals.get::<_,HashMap>("rate_limiter")?; + let rate_limiter = globals.get::<_, HashMap>("rate_limiter")?; - Ok(Config { - port: globals.get::<_, u16>("port")?, - binding_ip: globals.get::<_, String>("binding_ip")?, - style: Style::new( - globals.get::<_, String>("theme")?, - globals.get::<_, String>("colorscheme")?, - ), - redis_url: globals.get::<_, String>("redis_url")?, - aggregator: AggregatorConfig { - random_delay: globals.get::<_, bool>("production_use")?, - }, - logging, - debug, - upstream_search_engines: globals - .get::<_, HashMap>("upstream_search_engines")? - .into_iter() - .filter_map(|(key, value)| value.then_some(key)) - .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine)) - .collect(), - request_timeout: globals.get::<_, u8>("request_timeout")?, - threads, - rate_limiter: RateLimiter { - number_of_requests: rate_limiter["number_of_requests"], - time_limit: rate_limiter["time_limit"], + let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?; + let safe_search: u8 = match parsed_safe_search { + 0..=4 => parsed_safe_search, + _ => { + log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4."); + log::error!("Falling back to using the value `1` for the option"); + 1 } + }; + + Ok(Config { + port: globals.get::<_, u16>("port")?, + binding_ip: globals.get::<_, String>("binding_ip")?, + style: Style::new( + globals.get::<_, String>("theme")?, + globals.get::<_, String>("colorscheme")?, + ), + redis_url: globals.get::<_, String>("redis_url")?, + aggregator: AggregatorConfig { + random_delay: globals.get::<_, bool>("production_use")?, + }, + logging, + debug, + upstream_search_engines: globals + .get::<_, HashMap>("upstream_search_engines")? + .into_iter() + .filter_map(|(key, value)| value.then_some(key)) + .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine)) + .collect(), + request_timeout: globals.get::<_, u8>("request_timeout")?, + threads, + rate_limiter: RateLimiter { + number_of_requests: rate_limiter["number_of_requests"], + time_limit: rate_limiter["time_limit"], + }, + safe_search, }) } } From 1a222217c45db5e1248f6808bf04ba060bc7f06e Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 12 Sep 2023 17:59:33 +0300 Subject: [PATCH 22/23] =?UTF-8?q?=F0=9F=A7=B9=20chore:=20make=20github=20a?= =?UTF-8?q?ctions=20happy=20(#205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cache/error.rs | 9 +++------ src/results/aggregation_models.rs | 13 ++++++------- src/server/routes.rs | 3 ++- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/cache/error.rs b/src/cache/error.rs index efd87c9..8bdb977 100644 --- a/src/cache/error.rs +++ b/src/cache/error.rs @@ -5,15 +5,12 @@ use std::fmt; use redis::RedisError; /// A custom error type used for handling redis async pool associated errors. -/// -/// This enum provides variants three different categories of errors: -/// * `RedisError` - This variant handles all errors related to `RedisError`, -/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error -/// which occurs when all the connections in the connection pool return a connection -/// dropped redis error. #[derive(Debug)] pub enum PoolError { + /// This variant handles all errors related to `RedisError`, RedisError(RedisError), + /// This variant handles the errors which occurs when all the connections + /// in the connection pool return a connection dropped redis error. PoolExhaustionWithConnectionDropError, } diff --git a/src/results/aggregation_models.rs b/src/results/aggregation_models.rs index 495572a..30316e2 100644 --- a/src/results/aggregation_models.rs +++ b/src/results/aggregation_models.rs @@ -114,12 +114,12 @@ pub struct SearchResults { /// Stores the information on which engines failed with their engine name /// and the type of error that caused it. pub engine_errors_info: Vec, - /// Stores the flag option which holds the check value that the following - /// search query was disallowed when the safe search level set to 4 and it + /// Stores the flag option which holds the check value that the following + /// search query was disallowed when the safe search level set to 4 and it /// was present in the `Blocklist` file. pub disallowed: bool, - /// Stores the flag option which holds the check value that the following - /// search query was filtered when the safe search level set to 3 and it + /// Stores the flag option which holds the check value that the following + /// search query was filtered when the safe search level set to 3 and it /// was present in the `Blocklist` file. pub filtered: bool, } @@ -133,9 +133,8 @@ impl SearchResults { /// and stores it into a vector of `SearchResult` structs. /// * `page_query` - Takes an argument of current page`s search query `q` provided in /// the search url. - /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the - /// given search query. - /// * `` + /// * `engine_errors_info` - Takes an array of structs which contains information regarding + /// which engines failed with their names, reason and their severity color name. pub fn new( results: Vec, page_query: &str, diff --git a/src/server/routes.rs b/src/server/routes.rs index 96ad737..29b36aa 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -230,7 +230,8 @@ async fn results( // UI and use that. let mut results: SearchResults = match req.cookie("appCookie") { Some(cookie_value) => { - let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?; + let cookie_value: Cookie<'_> = + serde_json::from_str(cookie_value.name_value().1)?; let engines: Vec = cookie_value .engines From c60fdb8366b6244d6c6cf8061618a9734ef8aa3f Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 12 Sep 2023 18:53:32 +0300 Subject: [PATCH 23/23] =?UTF-8?q?=F0=9F=A7=B9=20chore:=20make=20github=20a?= =?UTF-8?q?ctions=20happy=20(#205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/parser.rs | 2 +- src/server/routes.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index e3b3588..4490bc6 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -34,7 +34,7 @@ pub struct Config { pub threads: u8, /// It stores configuration options for the ratelimiting middleware. pub rate_limiter: RateLimiter, - /// It stores the level of safe search to be used for restricting content in the + /// It stores the level of safe search to be used for restricting content in the /// search results. pub safe_search: u8, } diff --git a/src/server/routes.rs b/src/server/routes.rs index 29b36aa..57aa413 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -33,6 +33,8 @@ struct SearchParams { /// It stores the search parameter `page` (or pageno in simple words) /// of the search url. page: Option, + /// It stores the search parameter `safesearch` (or safe search level in simple words) of the + /// search url. safesearch: Option, } @@ -275,6 +277,8 @@ async fn results( } } +/// A helper function which checks whether the search query contains any keywords which should be +/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files. fn is_match_from_filter_list( file_path: &str, query: &str,