From c796ae8bb74e66fd13b303a918d45f2f45ff6462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Maria=C5=84ski?= <13919176+m00nwtchr@users.noreply.github.com> Date: Sat, 5 Oct 2024 03:47:36 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Option=20to=20use=20a=20proxy=20for?= =?UTF-8?q?=20outgoing=20`upstream=20search=20engine`=20requests=20(#573)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add option to use a proxy for outgoing (search engine) requests. * Enable socks feature in reqwest * Fix formatting * add proxy feature * Update src/config/parser.rs Co-authored-by: neon_arch * Update websurfx/config.lua Co-authored-by: neon_arch * Update Cargo.toml Co-authored-by: neon_arch * fix * Update Cargo.toml Co-authored-by: neon_arch --------- Co-authored-by: neon_arch --- Cargo.toml | 3 ++- src/config/parser.rs | 12 ++++++++++++ src/results/aggregator.rs | 14 ++++++++++---- websurfx/config.lua | 2 ++ 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ecad1cc..2365cbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,8 @@ reqwest = { version = "0.12.5", default-features = false, features = [ "rustls-tls", "brotli", "gzip", - "http2" + "http2", + "socks", ] } tokio = { version = "1.32.0", features = [ "rt-multi-thread", diff --git a/src/config/parser.rs b/src/config/parser.rs index 5ff9444..5d821c3 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType}; use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; use mlua::Lua; +use reqwest::Proxy; use std::{collections::HashMap, fs, thread::available_parallelism}; /// A named struct which stores the parsed config file options. @@ -48,6 +49,8 @@ pub struct Config { pub tcp_connection_keep_alive: u8, /// It stores the pool idle connection timeout in seconds. pub pool_idle_connection_timeout: u8, + /// Url of the proxy to use for outgoing requests. + pub proxy: Option, /// It stores the number of https connections to keep in the pool. pub number_of_https_connections: u8, } @@ -120,6 +123,14 @@ impl Config { _ => parsed_cet, }; + let proxy_opt = globals.get::<_, Option>("proxy")?; + let proxy = proxy_opt.and_then(|proxy_str| { + Proxy::all(proxy_str).ok().and_then(|_| { + log::error!("Invalid proxy url, defaulting to no proxy."); + None + }) + }); + Ok(Config { port: globals.get::<_, u16>("port")?, binding_ip: globals.get::<_, String>("binding_ip")?, @@ -151,6 +162,7 @@ impl Config { safe_search, #[cfg(any(feature = "redis-cache", feature = "memory-cache"))] cache_expiry_time, + proxy, }) } } diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 65d5a5e..a6b34a4 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -75,7 +75,7 @@ pub async fn aggregate( safe_search: u8, ) -> Result> { let client = CLIENT.get_or_init(|| { - ClientBuilder::new() + let mut cb = ClientBuilder::new() .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server .pool_idle_timeout(Duration::from_secs( config.pool_idle_connection_timeout as u64, @@ -86,9 +86,13 @@ pub async fn aggregate( .https_only(true) .gzip(true) .brotli(true) - .http2_adaptive_window(config.adaptive_window) - .build() - .unwrap() + .http2_adaptive_window(config.adaptive_window); + + if config.proxy.is_some() { + cb = cb.proxy(config.proxy.clone().unwrap()); + } + + cb.build().unwrap() }); let user_agent: &str = random_user_agent(); @@ -242,6 +246,7 @@ pub async fn filter_with_lists( Ok(()) } + /// Sorts SearchResults by relevance score. ///
sort_unstable is used as its faster,stability is not an issue on our side. /// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html) @@ -257,6 +262,7 @@ fn sort_search_results(results: &mut [SearchResult]) { .unwrap_or(Ordering::Less) }) } + #[cfg(test)] mod tests { use super::*; diff --git a/websurfx/config.lua b/websurfx/config.lua index 16c6146..8dd7a40 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -75,3 +75,5 @@ upstream_search_engines = { Mojeek = false, Bing = false, } -- select the upstream search engines from which the results should be fetched. + +proxy = nil -- Proxy to send outgoing requests through. Set to nil to disable. \ No newline at end of file