From 07e1f663df7146eac930ff2a785f6918bd3da098 Mon Sep 17 00:00:00 2001 From: m00nwtchr Date: Sat, 25 May 2024 15:30:08 +0200 Subject: [PATCH] Add option to use a proxy for outgoing (search engine) requests. --- src/config/parser.rs | 14 +++++++++++ src/results/aggregator.rs | 50 ++++++++++++++++++++++----------------- websurfx/config.lua | 2 ++ 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/src/config/parser.rs b/src/config/parser.rs index 8bed460..b9556c4 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType}; use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; use mlua::Lua; +use reqwest::Proxy; use std::{collections::HashMap, fs, thread::available_parallelism}; /// A named struct which stores the parsed config file options. @@ -48,6 +49,9 @@ pub struct Config { pub tcp_connection_keep_alive: u8, /// It stores the pool idle connection timeout in seconds. pub pool_idle_connection_timeout: u8, + + /// Url of the proxy to use for outgoing requests. + pub proxy: Option, } impl Config { @@ -118,6 +122,15 @@ impl Config { _ => parsed_cet, }; + let proxy_str = globals.get::<_, String>("proxy")?; + let proxy = match Proxy::all(proxy_str) { + Ok(proxy) => Some(proxy), + Err(_) => { + log::error!("Invalid proxy url, defaulting to no proxy."); + None + } + }; + Ok(Config { port: globals.get::<_, u16>("port")?, binding_ip: globals.get::<_, String>("binding_ip")?, @@ -148,6 +161,7 @@ impl Config { safe_search, #[cfg(any(feature = "redis-cache", feature = "memory-cache"))] cache_expiry_time, + proxy, }) } } diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index f8f1040..2bc09b1 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -76,7 +76,7 @@ pub async fn aggregate( safe_search: u8, ) -> Result> { let client = CLIENT.get_or_init(|| { - ClientBuilder::new() + let mut cb = ClientBuilder::new() .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server .pool_idle_timeout(Duration::from_secs( config.pool_idle_connection_timeout as u64, @@ -86,9 +86,13 @@ pub async fn aggregate( .https_only(true) .gzip(true) .brotli(true) - .http2_adaptive_window(config.adaptive_window) - .build() - .unwrap() + .http2_adaptive_window(config.adaptive_window); + + if config.proxy.is_some() { + cb = cb.proxy(config.proxy.clone().unwrap()); + } + + cb.build().unwrap() }); let user_agent: &str = random_user_agent(); @@ -247,6 +251,7 @@ pub async fn filter_with_lists( Ok(()) } + /// Sorts SearchResults by relevance score. ///
sort_unstable is used as its faster,stability is not an issue on our side. /// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html) @@ -262,6 +267,7 @@ fn sort_search_results(results: &mut [SearchResult]) { .unwrap_or(Ordering::Less) }) } + #[cfg(test)] mod tests { use super::*; @@ -285,15 +291,15 @@ mod tests { }, )); map_to_be_filtered.push(( - "https://www.rust-lang.org/".to_owned(), - SearchResult { - title: "Rust Programming Language".to_owned(), - url: "https://www.rust-lang.org/".to_owned(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), - engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], - relevance_score:0.0 - },) - ); + "https://www.rust-lang.org/".to_owned(), + SearchResult { + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], + relevance_score: 0.0, + }, ) + ); // Create a temporary file with regex patterns let mut file = NamedTempFile::new()?; @@ -336,15 +342,15 @@ mod tests { }, )); map_to_be_filtered.push(( - "https://www.rust-lang.org/".to_owned(), - SearchResult { - title: "Rust Programming Language".to_owned(), - url: "https://www.rust-lang.org/".to_owned(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), - engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], - relevance_score:0.0 - }, - )); + "https://www.rust-lang.org/".to_owned(), + SearchResult { + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], + relevance_score: 0.0, + }, + )); // Create a temporary file with a regex pattern containing a wildcard let mut file = NamedTempFile::new()?; diff --git a/websurfx/config.lua b/websurfx/config.lua index f346c1f..61857e4 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -73,3 +73,5 @@ upstream_search_engines = { Mojeek = false, Bing = false, } -- select the upstream search engines from which the results should be fetched. + +proxy = "" -- Proxy to send outgoing requests through. Set to empty string to disable. \ No newline at end of file