From 07e1f663df7146eac930ff2a785f6918bd3da098 Mon Sep 17 00:00:00 2001 From: m00nwtchr Date: Sat, 25 May 2024 15:30:08 +0200 Subject: [PATCH 1/8] Add option to use a proxy for outgoing (search engine) requests. --- src/config/parser.rs | 14 +++++++++++ src/results/aggregator.rs | 50 ++++++++++++++++++++++----------------- websurfx/config.lua | 2 ++ 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index 8bed460..b9556c4 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType}; use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; use mlua::Lua; +use reqwest::Proxy; use std::{collections::HashMap, fs, thread::available_parallelism}; /// A named struct which stores the parsed config file options. @@ -48,6 +49,9 @@ pub struct Config { pub tcp_connection_keep_alive: u8, /// It stores the pool idle connection timeout in seconds. pub pool_idle_connection_timeout: u8, + + /// Url of the proxy to use for outgoing requests. + pub proxy: Option, } impl Config { @@ -118,6 +122,15 @@ impl Config { _ => parsed_cet, }; + let proxy_str = globals.get::<_, String>("proxy")?; + let proxy = match Proxy::all(proxy_str) { + Ok(proxy) => Some(proxy), + Err(_) => { + log::error!("Invalid proxy url, defaulting to no proxy."); + None + } + }; + Ok(Config { port: globals.get::<_, u16>("port")?, binding_ip: globals.get::<_, String>("binding_ip")?, @@ -148,6 +161,7 @@ impl Config { safe_search, #[cfg(any(feature = "redis-cache", feature = "memory-cache"))] cache_expiry_time, + proxy, }) } } diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index f8f1040..2bc09b1 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -76,7 +76,7 @@ pub async fn aggregate( safe_search: u8, ) -> Result> { let client = CLIENT.get_or_init(|| { - ClientBuilder::new() + let mut cb = ClientBuilder::new() .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server .pool_idle_timeout(Duration::from_secs( config.pool_idle_connection_timeout as u64, @@ -86,9 +86,13 @@ pub async fn aggregate( .https_only(true) .gzip(true) .brotli(true) - .http2_adaptive_window(config.adaptive_window) - .build() - .unwrap() + .http2_adaptive_window(config.adaptive_window); + + if config.proxy.is_some() { + cb = cb.proxy(config.proxy.clone().unwrap()); + } + + cb.build().unwrap() }); let user_agent: &str = random_user_agent(); @@ -247,6 +251,7 @@ pub async fn filter_with_lists( Ok(()) } + /// Sorts SearchResults by relevance score. ///
sort_unstable is used as its faster,stability is not an issue on our side. /// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html) @@ -262,6 +267,7 @@ fn sort_search_results(results: &mut [SearchResult]) { .unwrap_or(Ordering::Less) }) } + #[cfg(test)] mod tests { use super::*; @@ -285,15 +291,15 @@ mod tests { }, )); map_to_be_filtered.push(( - "https://www.rust-lang.org/".to_owned(), - SearchResult { - title: "Rust Programming Language".to_owned(), - url: "https://www.rust-lang.org/".to_owned(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), - engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], - relevance_score:0.0 - },) - ); + "https://www.rust-lang.org/".to_owned(), + SearchResult { + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], + relevance_score: 0.0, + }, ) + ); // Create a temporary file with regex patterns let mut file = NamedTempFile::new()?; @@ -336,15 +342,15 @@ mod tests { }, )); map_to_be_filtered.push(( - "https://www.rust-lang.org/".to_owned(), - SearchResult { - title: "Rust Programming Language".to_owned(), - url: "https://www.rust-lang.org/".to_owned(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), - engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], - relevance_score:0.0 - }, - )); + "https://www.rust-lang.org/".to_owned(), + SearchResult { + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], + relevance_score: 0.0, + }, + )); // Create a temporary file with a regex pattern containing a wildcard let mut file = NamedTempFile::new()?; diff --git a/websurfx/config.lua b/websurfx/config.lua index f346c1f..61857e4 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -73,3 +73,5 @@ upstream_search_engines = { Mojeek = false, Bing = false, } -- select the upstream search engines from which the results should be fetched. + +proxy = "" -- Proxy to send outgoing requests through. Set to empty string to disable. \ No newline at end of file From f6cf1ce5f543c27238c67035b6737ff3f9405e33 Mon Sep 17 00:00:00 2001 From: m00nwtchr Date: Sat, 25 May 2024 15:34:03 +0200 Subject: [PATCH 2/8] Enable socks feature in reqwest --- Cargo.lock | 33 +++++++++++++++++++++++++++++++++ Cargo.toml | 1 + 2 files changed, 34 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index e2469ad..19d111c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3038,6 +3038,7 @@ dependencies = [ "system-configuration", "tokio 1.36.0", "tokio-rustls", + "tokio-socks", "tokio-util", "tower-service", "url 2.5.0", @@ -3675,6 +3676,26 @@ dependencies = [ "libflate", ] +[[package]] +name = "thiserror" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" +dependencies = [ + "proc-macro2 1.0.78", + "quote 1.0.35", + "syn 2.0.52", +] + [[package]] name = "thousands" version = "0.2.0" @@ -3879,6 +3900,18 @@ dependencies = [ "tokio 1.36.0", ] +[[package]] +name = "tokio-socks" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51165dfa029d2a65969413a6cc96f354b86b464498702f174a4efa13608fd8c0" +dependencies = [ + "either", + "futures-util", + "thiserror", + "tokio 1.36.0", +] + [[package]] name = "tokio-sync" version = "0.1.8" diff --git a/Cargo.toml b/Cargo.toml index 38a9378..01be674 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ reqwest = { version = "0.11.24", default-features = false, features = [ "rustls-tls", "brotli", "gzip", + "socks" ] } tokio = { version = "1.32.0", features = [ "rt-multi-thread", From ed661174ba0e8e0b25ad79c5e7ff29a15d6dff50 Mon Sep 17 00:00:00 2001 From: m00nwtchr Date: Sat, 25 May 2024 15:41:32 +0200 Subject: [PATCH 3/8] Fix formatting --- src/results/aggregator.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 2bc09b1..7d2cb56 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -291,15 +291,15 @@ mod tests { }, )); map_to_be_filtered.push(( - "https://www.rust-lang.org/".to_owned(), - SearchResult { - title: "Rust Programming Language".to_owned(), - url: "https://www.rust-lang.org/".to_owned(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), - engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], - relevance_score: 0.0, - }, ) - ); + "https://www.rust-lang.org/".to_owned(), + SearchResult { + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], + relevance_score: 0.0, + }, ) + ); // Create a temporary file with regex patterns let mut file = NamedTempFile::new()?; @@ -342,15 +342,15 @@ mod tests { }, )); map_to_be_filtered.push(( - "https://www.rust-lang.org/".to_owned(), - SearchResult { - title: "Rust Programming Language".to_owned(), - url: "https://www.rust-lang.org/".to_owned(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), - engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], - relevance_score: 0.0, - }, - )); + "https://www.rust-lang.org/".to_owned(), + SearchResult { + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], + relevance_score: 0.0, + }, + )); // Create a temporary file with a regex pattern containing a wildcard let mut file = NamedTempFile::new()?; From 89d367ff999d3b54d6632744a185399448081afc Mon Sep 17 00:00:00 2001 From: m00nwtchr Date: Wed, 11 Sep 2024 18:05:47 +0200 Subject: [PATCH 4/8] add proxy feature --- Cargo.lock | 6 +++--- Cargo.toml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 932ab93..7e8cf25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4035,14 +4035,14 @@ dependencies = [ [[package]] name = "tokio-socks" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51165dfa029d2a65969413a6cc96f354b86b464498702f174a4efa13608fd8c0" +checksum = "0d4770b8024672c1101b3f6733eab95b18007dbe0847a8afe341fcf79e06043f" dependencies = [ "either", "futures-util", "thiserror", - "tokio 1.36.0", + "tokio 1.38.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 3c4c003..53347c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,6 @@ reqwest = { version = "0.12.5", default-features = false, features = [ "rustls-tls", "brotli", "gzip", - "socks", "http2", ] } tokio = { version = "1.32.0", features = [ @@ -179,7 +178,7 @@ opt-level = "z" [features] use-synonyms-search = ["thesaurus/static"] -default = ["memory-cache"] +default = ["memory-cache", "socks"] dhat-heap = ["dep:dhat"] memory-cache = ["dep:mini-moka"] redis-cache = ["dep:redis", "dep:base64"] @@ -188,3 +187,4 @@ encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"] cec-cache-results = ["compress-cache-results", "encrypt-cache-results"] experimental-io-uring = ["actix-web/experimental-io-uring"] use-non-static-synonyms-search = ["thesaurus"] +socks = ["reqwest/socks"] \ No newline at end of file From 6464827834e107408572e074ea28c62d53b6c440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Maria=C5=84ski?= <13919176+m00nwtchr@users.noreply.github.com> Date: Fri, 4 Oct 2024 06:21:11 +0000 Subject: [PATCH 5/8] Update src/config/parser.rs Co-authored-by: neon_arch --- src/config/parser.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index d9c5c5e..e51f5ed 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -123,14 +123,12 @@ impl Config { _ => parsed_cet, }; - let proxy_str = globals.get::<_, String>("proxy")?; - let proxy = match Proxy::all(proxy_str) { - Ok(proxy) => Some(proxy), - Err(_) => { + let proxy_opt = globals.get::<_, Option>("proxy")?; + let proxy = proxy_opt.and_then(|proxy_str| { + Proxy::all(proxy_str).ok().and_then(|_| { log::error!("Invalid proxy url, defaulting to no proxy."); None - } - }; + }) Ok(Config { port: globals.get::<_, u16>("port")?, From aa18ce3a6711cb9eba1275ce8334794e0a3b58ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Maria=C5=84ski?= <13919176+m00nwtchr@users.noreply.github.com> Date: Fri, 4 Oct 2024 06:21:26 +0000 Subject: [PATCH 6/8] Update websurfx/config.lua Co-authored-by: neon_arch --- websurfx/config.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/websurfx/config.lua b/websurfx/config.lua index 67f4ca0..8dd7a40 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -76,4 +76,4 @@ upstream_search_engines = { Bing = false, } -- select the upstream search engines from which the results should be fetched. -proxy = "" -- Proxy to send outgoing requests through. Set to empty string to disable. \ No newline at end of file +proxy = nil -- Proxy to send outgoing requests through. Set to nil to disable. \ No newline at end of file From 808707c43be326b59ef09f4e245d5cb50fd3da07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Maria=C5=84ski?= <13919176+m00nwtchr@users.noreply.github.com> Date: Fri, 4 Oct 2024 06:21:41 +0000 Subject: [PATCH 7/8] Update Cargo.toml Co-authored-by: neon_arch --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 557d953..5eda203 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -182,5 +182,4 @@ compress-cache-results = ["dep:async-compression", "dep:cfg-if"] encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"] cec-cache-results = ["compress-cache-results", "encrypt-cache-results"] experimental-io-uring = ["actix-web/experimental-io-uring"] -use-non-static-synonyms-search = ["thesaurus"] -socks = ["reqwest/socks"] \ No newline at end of file +use-non-static-synonyms-search = ["thesaurus"] \ No newline at end of file From d20c5c4dab85b8cdbb3ca1d74756e90df8fe3c80 Mon Sep 17 00:00:00 2001 From: m00nwtchr Date: Fri, 4 Oct 2024 08:28:44 +0200 Subject: [PATCH 8/8] fix --- Cargo.lock | 13 ------------- Cargo.toml | 4 ++-- src/config/parser.rs | 3 ++- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b9222bc..de9a820 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3266,7 +3266,6 @@ dependencies = [ "sync_wrapper", "tokio 1.40.0", "tokio-rustls", - "tokio-socks", "tokio-util", "tower-service", "url 2.5.2", @@ -4097,18 +4096,6 @@ dependencies = [ "tokio 1.40.0", ] -[[package]] -name = "tokio-socks" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4770b8024672c1101b3f6733eab95b18007dbe0847a8afe341fcf79e06043f" -dependencies = [ - "either", - "futures-util", - "thiserror", - "tokio 1.38.0", -] - [[package]] name = "tokio-sync" version = "0.1.8" diff --git a/Cargo.toml b/Cargo.toml index 5eda203..68a3b4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -174,7 +174,7 @@ opt-level = "z" [features] use-synonyms-search = ["thesaurus/static"] -default = ["memory-cache", "socks"] +default = ["memory-cache"] dhat-heap = ["dep:dhat"] memory-cache = ["dep:moka"] redis-cache = ["dep:redis", "dep:base64"] @@ -182,4 +182,4 @@ compress-cache-results = ["dep:async-compression", "dep:cfg-if"] encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"] cec-cache-results = ["compress-cache-results", "encrypt-cache-results"] experimental-io-uring = ["actix-web/experimental-io-uring"] -use-non-static-synonyms-search = ["thesaurus"] \ No newline at end of file +use-non-static-synonyms-search = ["thesaurus"] diff --git a/src/config/parser.rs b/src/config/parser.rs index e51f5ed..5d821c3 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -123,12 +123,13 @@ impl Config { _ => parsed_cet, }; - let proxy_opt = globals.get::<_, Option>("proxy")?; + let proxy_opt = globals.get::<_, Option>("proxy")?; let proxy = proxy_opt.and_then(|proxy_str| { Proxy::all(proxy_str).ok().and_then(|_| { log::error!("Invalid proxy url, defaulting to no proxy."); None }) + }); Ok(Config { port: globals.get::<_, u16>("port")?,