From 4f287112182fd469231ad710e1fc2f9a97e048b6 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 22 Aug 2023 19:16:37 +0300 Subject: [PATCH 1/8] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20code=20to=20filte?= =?UTF-8?q?r=20aggregated=20search=20results=20using=20lists=20(#163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/results/aggregator.rs | 55 +++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 16586c0..8e92b10 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -1,18 +1,22 @@ //! This module provides the functionality to scrape and gathers all the results from the upstream //! search engines and then removes duplicate results. -use std::{collections::HashMap, time::Duration}; - -use error_stack::Report; -use rand::Rng; -use tokio::task::JoinHandle; +use std::{collections::HashMap, io::BufReader, time::Duration}; use super::{ aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, user_agent::random_user_agent, }; +use error_stack::Report; +use rand::Rng; +use regex::Regex; +use std::{fs::File, io::BufRead}; +use tokio::task::JoinHandle; -use crate::engines::engine_models::{EngineError, EngineHandler}; +use crate::{ + engines::engine_models::{EngineError, EngineHandler}, + handler::paths::{file_path, FileType}, +}; /// Aliases for long type annotations type FutureVec = Vec, Report>>>; @@ -106,7 +110,7 @@ pub async fn aggregate( log::error!("Engine Error: {:?}", error); engine_errors_info.push(EngineErrorInfo::new( error.downcast_ref::().unwrap(), - engine_name.to_string(), + engine_name, )); }; @@ -143,7 +147,22 @@ pub async fn aggregate( } } - let results = result_map.into_values().collect(); + let mut blacklist_map: HashMap = HashMap::new(); + filter_with_lists( + &mut result_map, + &mut blacklist_map, + &file_path(FileType::BlockList)?, + )?; + + filter_with_lists( + &mut blacklist_map, + &mut result_map, + &file_path(FileType::AllowList)?, + )?; + + drop(blacklist_map); + + let results: Vec = result_map.into_values().collect(); Ok(SearchResults::new( results, @@ -151,3 +170,23 @@ pub async fn aggregate( engine_errors_info, )) } + +fn filter_with_lists( + map_to_be_filtered: &mut HashMap, + resultant_map: &mut HashMap, + file_path: &str, +) -> Result<(), Box> { + for (url, search_result) in map_to_be_filtered.clone().into_iter() { + let reader = BufReader::new(File::open(file_path)?); + for line in reader.lines() { + let re = Regex::new(&line?)?; + if re.is_match(&url.to_lowercase()) + || re.is_match(&search_result.title.to_lowercase()) + || re.is_match(&search_result.description.to_lowercase()) + { + resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap()); + } + } + } + Ok(()) +} From fda6c3a9be3394a1457962182a8343697a4591f0 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 22 Aug 2023 19:18:43 +0300 Subject: [PATCH 2/8] =?UTF-8?q?=E2=9C=A8=20feat:=20rename=20public=5Fpaths?= =?UTF-8?q?=20to=20paths=20(#163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/handler/mod.rs | 2 +- src/handler/paths.rs | 111 ++++++++++++++++++++++++++++++++++++ src/handler/public_paths.rs | 33 ----------- 3 files changed, 112 insertions(+), 34 deletions(-) create mode 100644 src/handler/paths.rs delete mode 100644 src/handler/public_paths.rs diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 0d07e51..8118b29 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -1 +1 @@ -pub mod public_paths; +pub mod paths; diff --git a/src/handler/paths.rs b/src/handler/paths.rs new file mode 100644 index 0000000..9b4fa07 --- /dev/null +++ b/src/handler/paths.rs @@ -0,0 +1,111 @@ +//! This module provides the functionality to handle theme folder present on different paths and +//! provide one appropriate path on which it is present and can be used. + +use std::collections::HashMap; +use std::io::Error; +use std::path::Path; + +// ------- Constants -------- +static PUBLIC_DIRECTORY_NAME: &str = "public"; +static COMMON_DIRECTORY_NAME: &str = "websurfx"; +static CONFIG_FILE_NAME: &str = "config.lua"; +static ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; +static BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; + +#[derive(Hash, PartialEq, Eq, Debug)] +pub enum FileType { + Config, + AllowList, + BlockList, + Theme, +} + +static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy>> = + once_cell::sync::Lazy::new(|| { + HashMap::from([ + ( + FileType::Config, + vec![ + format!( + "{}/.config/{}/{}", + std::env::var("HOME").unwrap(), + COMMON_DIRECTORY_NAME, + CONFIG_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), + ], + ), + ( + FileType::Theme, + vec![ + format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME), + format!("./{}/", PUBLIC_DIRECTORY_NAME), + ], + ), + ( + FileType::AllowList, + vec![ + format!( + "{}/.config/{}/{}", + std::env::var("HOME").unwrap(), + COMMON_DIRECTORY_NAME, + ALLOWLIST_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), + ], + ), + ( + FileType::BlockList, + vec![ + format!( + "{}/.config/{}/{}", + std::env::var("HOME").unwrap(), + COMMON_DIRECTORY_NAME, + BLOCKLIST_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), + ], + ), + ]) + }); + +/// A helper function which returns an appropriate config file path checking if the config +/// file exists on that path. +/// +/// # Error +/// +/// Returns a `config file not found!!` error if the config file is not present under following +/// paths which are: +/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2) +/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next +/// one (3). +/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present +/// here then it returns an error as mentioned above. + +/// A function which returns an appropriate theme directory path checking if the theme +/// directory exists on that path. +/// +/// # Error +/// +/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following +/// paths which are: +/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) +/// 2. Under project folder ( or codebase in other words) if it is not present +/// here then it returns an error as mentioned above. +pub fn file_path(file_type: FileType) -> Result { + let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap(); + for (idx, _) in file_path.iter().enumerate() { + if Path::new(file_path[idx].as_str()).exists() { + return Ok(file_path[idx].clone()); + } + } + + // if no of the configs above exist, return error + Err(Error::new( + std::io::ErrorKind::NotFound, + format!("{:?} file not found!!", file_type), + )) +} diff --git a/src/handler/public_paths.rs b/src/handler/public_paths.rs deleted file mode 100644 index 18d51e4..0000000 --- a/src/handler/public_paths.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! This module provides the functionality to handle theme folder present on different paths and -//! provide one appropriate path on which it is present and can be used. - -use std::io::Error; -use std::path::Path; - -// ------- Constants -------- -static PUBLIC_DIRECTORY_NAME: &str = "public"; - -/// A function which returns an appropriate theme directory path checking if the theme -/// directory exists on that path. -/// -/// # Error -/// -/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following -/// paths which are: -/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) -/// 2. Under project folder ( or codebase in other words) if it is not present -/// here then it returns an error as mentioned above. -pub fn public_path() -> Result { - if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() { - return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME)); - } - - if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() { - return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME)); - } - - Err(Error::new( - std::io::ErrorKind::NotFound, - "Themes (public) folder not found!!", - )) -} From df09ed9f4820706688f7e438a8afa30a1eb7d0cb Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 22 Aug 2023 19:19:31 +0300 Subject: [PATCH 3/8] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20lists=20file=20to?= =?UTF-8?q?=20filter=20search=20results=20(#163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- websurfx/allowlist.txt | 0 websurfx/blocklist.txt | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 websurfx/allowlist.txt create mode 100644 websurfx/blocklist.txt diff --git a/websurfx/allowlist.txt b/websurfx/allowlist.txt new file mode 100644 index 0000000..e69de29 diff --git a/websurfx/blocklist.txt b/websurfx/blocklist.txt new file mode 100644 index 0000000..e69de29 From 09227d8c93f0fba9c2780af5e586f5f19eeeeb90 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 22 Aug 2023 19:20:29 +0300 Subject: [PATCH 4/8] =?UTF-8?q?=E2=9C=A8=20feat:=20move=20implementation?= =?UTF-8?q?=20of=20config=20file=20path=20to=20reduce=20duplication=20(#16?= =?UTF-8?q?3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/parser.rs | 56 ++++---------------------------------------- 1 file changed, 4 insertions(+), 52 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index e05f56d..4639013 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -1,14 +1,12 @@ //! This module provides the functionality to parse the lua config and convert the config options //! into rust readable form. +use crate::handler::paths::{file_path, FileType}; + use super::parser_models::Style; use log::LevelFilter; use rlua::Lua; -use std::{collections::HashMap, format, fs, path::Path, thread::available_parallelism}; - -// ------- Constants -------- -static COMMON_DIRECTORY_NAME: &str = "websurfx"; -static CONFIG_FILE_NAME: &str = "config.lua"; +use std::{collections::HashMap, fs, thread::available_parallelism}; /// A named struct which stores the parsed config file options. /// @@ -69,7 +67,7 @@ impl Config { let globals = context.globals(); context - .load(&fs::read_to_string(Config::config_path()?)?) + .load(&fs::read_to_string(file_path(FileType::Config)?)?) .exec()?; let parsed_threads: u8 = globals.get::<_, u8>("threads")?; @@ -114,52 +112,6 @@ impl Config { }) }) } - - /// A helper function which returns an appropriate config file path checking if the config - /// file exists on that path. - /// - /// # Error - /// - /// Returns a `config file not found!!` error if the config file is not present under following - /// paths which are: - /// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2) - /// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next - /// one (3). - /// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present - /// here then it returns an error as mentioned above. - fn config_path() -> Result> { - // check user config - - let path = format!( - "{}/.config/{}/config.lua", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME - ); - if Path::new(path.as_str()).exists() { - return Ok(format!( - "{}/.config/{}/{}", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME, - CONFIG_FILE_NAME - )); - } - - // look for config in /etc/xdg - if Path::new(format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()) - .exists() - { - return Ok("/etc/xdg/websurfx/config.lua".to_string()); - } - - // use dev config - if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()).exists() - { - return Ok("./websurfx/config.lua".to_string()); - } - - // if no of the configs above exist, return error - Err("Config file not found!!".to_string().into()) - } } /// a helper function that sets the proper logging level From b17057471b6411ec3793d596494aea34f9228344 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 22 Aug 2023 19:22:37 +0300 Subject: [PATCH 5/8] =?UTF-8?q?=E2=9C=A8=20feat:=20rename=20functions=20(#?= =?UTF-8?q?163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 4 ++-- src/server/routes.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fe8ee92..cd83d8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,7 @@ use actix_files as fs; use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer}; use config::parser::Config; use handlebars::Handlebars; -use handler::public_paths::public_path; +use handler::paths::{file_path, FileType}; /// Runs the web server on the provided TCP listener and returns a `Server` instance. /// @@ -42,7 +42,7 @@ use handler::public_paths::public_path; pub fn run(listener: TcpListener, config: Config) -> std::io::Result { let mut handlebars: Handlebars = Handlebars::new(); - let public_folder_path: String = public_path()?; + let public_folder_path: String = file_path(FileType::Theme)?; handlebars .register_templates_directory(".html", format!("{}/templates", public_folder_path)) diff --git a/src/server/routes.rs b/src/server/routes.rs index 77210b2..93c6fd5 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -8,7 +8,7 @@ use crate::{ cache::cacher::RedisCache, config::parser::Config, engines::engine_models::EngineHandler, - handler::public_paths::public_path, + handler::paths::{file_path, FileType}, results::{aggregation_models::SearchResults, aggregator::aggregate}, }; use actix_web::{get, web, HttpRequest, HttpResponse}; @@ -215,7 +215,7 @@ async fn results( /// Handles the route of robots.txt page of the `websurfx` meta search engine website. #[get("/robots.txt")] pub async fn robots_data(_req: HttpRequest) -> Result> { - let page_content: String = read_to_string(format!("{}/robots.txt", public_path()?))?; + let page_content: String = read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; Ok(HttpResponse::Ok() .content_type("text/plain; charset=ascii") .body(page_content)) From e8998a4be2d4c4ef72af13e1aba43fc78d9e4fb0 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 22 Aug 2023 19:23:39 +0300 Subject: [PATCH 6/8] =?UTF-8?q?=F0=9F=9A=80=20chore:=20bump=20the=20app=20?= =?UTF-8?q?&=20crates=20version=20&=20add=20regex=20crate=20(#163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 52 +++++++++++++++++++++++++++------------------------- Cargo.toml | 9 +++++---- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61a2d0d..eccdff7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,7 +77,7 @@ dependencies = [ "encoding_rs", "flate2", "futures-core", - "h2 0.3.20", + "h2 0.3.21", "http 0.2.9", "httparse", "httpdate", @@ -475,9 +475,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", "libc", @@ -816,9 +816,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7684a49fb1af197853ef7b2ee694bc1f5b4179556f1e5710e1760c5db6f5e929" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" [[package]] name = "derive_more" @@ -1176,9 +1176,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.20" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049" +checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" dependencies = [ "bytes 1.4.0", "fnv", @@ -1363,7 +1363,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.3.20", + "h2 0.3.21", "http 0.2.9", "http-body 0.4.5", "httparse", @@ -2454,16 +2454,16 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.18" +version = "0.11.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" +checksum = "20b9b67e2ca7dd9e9f9285b759de30ff538aab981abaaf7bc9bd90b84a0126c3" dependencies = [ "base64 0.21.2", "bytes 1.4.0", "encoding_rs", "futures-core", "futures-util", - "h2 0.3.20", + "h2 0.3.21", "http 0.2.9", "http-body 0.4.5", "hyper 0.14.27", @@ -2486,7 +2486,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg 0.10.1", + "winreg 0.50.0", ] [[package]] @@ -2684,18 +2684,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.183" +version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" +checksum = "be9b6f69f1dfd54c3b568ffa45c310d6973a5e5148fd40cf515acaf38cf5bc31" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.183" +version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" +checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", @@ -2797,9 +2797,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slab" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ "autocfg 1.1.0", ] @@ -3328,9 +3328,9 @@ checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" [[package]] name = "unicase" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" dependencies = [ "version_check", ] @@ -3543,7 +3543,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.17.0" +version = "0.18.0" dependencies = [ "actix-cors", "actix-files", @@ -3559,7 +3559,8 @@ dependencies = [ "once_cell", "rand 0.8.5", "redis", - "reqwest 0.11.18", + "regex", + "reqwest 0.11.19", "rlua", "rusty-hook", "scraper", @@ -3688,11 +3689,12 @@ dependencies = [ [[package]] name = "winreg" -version = "0.10.1" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ - "winapi 0.3.9", + "cfg-if 1.0.0", + "windows-sys", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f83aa88..c5f9013 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,15 +1,15 @@ [package] name = "websurfx" -version = "0.17.0" +version = "0.18.0" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" license = "AGPL-3.0" [dependencies] -reqwest = {version="0.11.18",features=["json"]} +reqwest = {version="0.11.19",features=["json"]} tokio = {version="1.32.0",features=["full"]} -serde = {version="1.0.183",features=["derive"]} +serde = {version="1.0.185",features=["derive"]} handlebars = { version = "4.3.7", features = ["dir_source"] } scraper = {version="0.17.1"} actix-web = {version="4.3.1", features = ["cookies"]} @@ -26,6 +26,7 @@ rand={version="0.8.5"} once_cell = {version="1.18.0"} error-stack = {version="0.3.1"} async-trait = {version="0.1.73"} +regex = {version="1.9.3", features=["perf"]} [dev-dependencies] rusty-hook = "^0.11.2" @@ -50,7 +51,7 @@ split-debuginfo = '...' debug-assertions = false overflow-checks = false lto = 'thin' -panic = 'unwind' +panic = 'abort' incremental = false codegen-units = 16 rpath = false From 9c71c9f61705ed7aaaaa9502973c72c7cd13a3e6 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Tue, 22 Aug 2023 19:29:13 +0300 Subject: [PATCH 7/8] =?UTF-8?q?=F0=9F=A7=B9=20chore:=20make=20rustfmt=20ha?= =?UTF-8?q?ppy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/server/routes.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/server/routes.rs b/src/server/routes.rs index 93c6fd5..8910f8f 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -215,7 +215,8 @@ async fn results( /// Handles the route of robots.txt page of the `websurfx` meta search engine website. #[get("/robots.txt")] pub async fn robots_data(_req: HttpRequest) -> Result> { - let page_content: String = read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; + let page_content: String = + read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; Ok(HttpResponse::Ok() .content_type("text/plain; charset=ascii") .body(page_content)) From 44216e4d4c379df39532424841a842f494eac647 Mon Sep 17 00:00:00 2001 From: neon_arch Date: Wed, 23 Aug 2023 13:11:09 +0300 Subject: [PATCH 8/8] =?UTF-8?q?=E2=9C=A8=20feat:=20optimise=20search=20res?= =?UTF-8?q?ults=20filtering=20code=20(#163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/results/aggregator.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 8e92b10..23ed091 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -1,7 +1,11 @@ //! This module provides the functionality to scrape and gathers all the results from the upstream //! search engines and then removes duplicate results. -use std::{collections::HashMap, io::BufReader, time::Duration}; +use std::{ + collections::HashMap, + io::{BufReader, Read}, + time::Duration, +}; use super::{ aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, @@ -176,10 +180,10 @@ fn filter_with_lists( resultant_map: &mut HashMap, file_path: &str, ) -> Result<(), Box> { - for (url, search_result) in map_to_be_filtered.clone().into_iter() { - let reader = BufReader::new(File::open(file_path)?); - for line in reader.lines() { - let re = Regex::new(&line?)?; + let mut reader = BufReader::new(File::open(file_path)?); + for line in reader.by_ref().lines() { + let re = Regex::new(&line?)?; + for (url, search_result) in map_to_be_filtered.clone().into_iter() { if re.is_match(&url.to_lowercase()) || re.is_match(&search_result.title.to_lowercase()) || re.is_match(&search_result.description.to_lowercase())