0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-24 23:18:22 -05:00

Compare commits

..

1 Commits

Author SHA1 Message Date
Kekma
9a812c1282
Merge c6b93403b8 into 948d20d8fb 2024-09-04 15:49:34 +00:00
15 changed files with 663 additions and 739 deletions

1160
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[package] [package]
name = "websurfx" name = "websurfx"
version = "1.17.22" version = "1.17.20"
edition = "2021" edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx" repository = "https://github.com/neon-mmd/websurfx"
@ -27,7 +27,6 @@ tokio = { version = "1.32.0", features = [
], default-features = false } ], default-features = false }
serde = { version = "1.0.209", default-features = false, features = ["derive"] } serde = { version = "1.0.209", default-features = false, features = ["derive"] }
serde_json = { version = "1.0.122", default-features = false } serde_json = { version = "1.0.122", default-features = false }
bincode = {version="1.3.3", default-features=false}
maud = { version = "0.26.0", default-features = false, features = [ maud = { version = "0.26.0", default-features = false, features = [
"actix-web", "actix-web",
] } ] }
@ -49,7 +48,6 @@ mlua = { version = "0.9.9", features = [
redis = { version = "0.25.4", features = [ redis = { version = "0.25.4", features = [
"tokio-comp", "tokio-comp",
"connection-manager", "connection-manager",
"tcp_nodelay"
], default-features = false, optional = true } ], default-features = false, optional = true }
blake3 = { version = "1.5.4", default-features = false } blake3 = { version = "1.5.4", default-features = false }
error-stack = { version = "0.4.0", default-features = false, features = [ error-stack = { version = "0.4.0", default-features = false, features = [
@ -57,13 +55,17 @@ error-stack = { version = "0.4.0", default-features = false, features = [
] } ] }
async-trait = { version = "0.1.80", default-features = false } async-trait = { version = "0.1.80", default-features = false }
regex = { version = "1.9.4", features = ["perf"], default-features = false } regex = { version = "1.9.4", features = ["perf"], default-features = false }
smallvec = { version = "1.13.1", features = [
"union",
"serde",
], default-features = false }
futures = { version = "0.3.30", default-features = false, features = ["alloc"] } futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
dhat = { version = "0.3.2", optional = true, default-features = false } dhat = { version = "0.3.2", optional = true, default-features = false }
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
async-once-cell = { version = "0.5.3", default-features = false } async-once-cell = { version = "0.5.3", default-features = false }
actix-governor = { version = "0.5.0", default-features = false } actix-governor = { version = "0.5.0", default-features = false }
moka = { version = "0.12.8", optional = true, default-features = false, features = [ mini-moka = { version = "0.10", optional = true, default-features = false, features = [
"future", "sync",
] } ] }
async-compression = { version = "0.4.12", default-features = false, features = [ async-compression = { version = "0.4.12", default-features = false, features = [
"brotli", "brotli",
@ -80,8 +82,8 @@ base64 = { version = "0.21.5", default-features = false, features = [
cfg-if = { version = "1.0.0", default-features = false, optional = true } cfg-if = { version = "1.0.0", default-features = false, optional = true }
keyword_extraction = { version = "1.4.3", default-features = false, features = [ keyword_extraction = { version = "1.4.3", default-features = false, features = [
"tf_idf", "tf_idf",
"rayon",
] } ] }
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] } stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [ thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
"moby", "moby",
@ -102,6 +104,8 @@ lightningcss = { version = "1.0.0-alpha.57", default-features = false, features
# Temporary fork with fix # Temporary fork with fix
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false} minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
[profile.dev] [profile.dev]
opt-level = 0 opt-level = 0
debug = true debug = true
@ -176,7 +180,7 @@ opt-level = "z"
use-synonyms-search = ["thesaurus/static"] use-synonyms-search = ["thesaurus/static"]
default = ["memory-cache"] default = ["memory-cache"]
dhat-heap = ["dep:dhat"] dhat-heap = ["dep:dhat"]
memory-cache = ["dep:moka"] memory-cache = ["dep:mini-moka"]
redis-cache = ["dep:redis", "dep:base64"] redis-cache = ["dep:redis", "dep:base64"]
compress-cache-results = ["dep:async-compression", "dep:cfg-if"] compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"] encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]

View File

@ -34,11 +34,11 @@
}, },
"nixpkgs_2": { "nixpkgs_2": {
"locked": { "locked": {
"lastModified": 1725194671, "lastModified": 1695318763,
"narHash": "sha256-tLGCFEFTB5TaOKkpfw3iYT9dnk4awTP/q4w+ROpMfuw=", "narHash": "sha256-FHVPDRP2AfvsxAdc+AsgFJevMz5VBmnZglFUMlxBkcY=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "b833ff01a0d694b910daca6e2ff4a3f26dee478c", "rev": "e12483116b3b51a185a33a272bf351e357ba9a99",
"type": "github" "type": "github"
}, },
"original": { "original": {

View File

@ -36,7 +36,7 @@
haskellPackages.hadolint haskellPackages.hadolint
nodejs nodejs
nodePackages_latest.cspell nodePackages_latest.cspell
eslint nodePackages_latest.eslint
nodePackages_latest.markdownlint-cli2 nodePackages_latest.markdownlint-cli2
nodePackages_latest.stylelint nodePackages_latest.stylelint
redis redis

47
src/cache/cacher.rs vendored
View File

@ -2,9 +2,10 @@
//! from the upstream search engines in a json format. //! from the upstream search engines in a json format.
use error_stack::Report; use error_stack::Report;
use futures::future::join_all;
#[cfg(feature = "memory-cache")] #[cfg(feature = "memory-cache")]
use moka::future::Cache as MokaCache; use mini_moka::sync::Cache as MokaCache;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::ConcurrentCacheExt;
#[cfg(feature = "memory-cache")] #[cfg(feature = "memory-cache")]
use std::time::Duration; use std::time::Duration;
@ -375,13 +376,13 @@ impl Cacher for RedisCache {
} }
} }
/// TryInto implementation for SearchResults from Vec<u8> /// TryInto implementation for SearchResults from Vec<u8>
use std::{convert::TryInto, sync::Arc}; use std::convert::TryInto;
impl TryInto<SearchResults> for Vec<u8> { impl TryInto<SearchResults> for Vec<u8> {
type Error = CacheError; type Error = CacheError;
fn try_into(self) -> Result<SearchResults, Self::Error> { fn try_into(self) -> Result<SearchResults, Self::Error> {
bincode::deserialize_from(self.as_slice()).map_err(|_| CacheError::SerializationError) serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError)
} }
} }
@ -389,7 +390,7 @@ impl TryInto<Vec<u8>> for &SearchResults {
type Error = CacheError; type Error = CacheError;
fn try_into(self) -> Result<Vec<u8>, Self::Error> { fn try_into(self) -> Result<Vec<u8>, Self::Error> {
bincode::serialize(self).map_err(|_| CacheError::SerializationError) serde_json::to_vec(self).map_err(|_| CacheError::SerializationError)
} }
} }
@ -397,16 +398,7 @@ impl TryInto<Vec<u8>> for &SearchResults {
#[cfg(feature = "memory-cache")] #[cfg(feature = "memory-cache")]
pub struct InMemoryCache { pub struct InMemoryCache {
/// The backend cache which stores data. /// The backend cache which stores data.
cache: Arc<MokaCache<String, Vec<u8>>>, cache: MokaCache<String, Vec<u8>>,
}
#[cfg(feature = "memory-cache")]
impl Clone for InMemoryCache {
fn clone(&self) -> Self {
Self {
cache: self.cache.clone(),
}
}
} }
#[cfg(feature = "memory-cache")] #[cfg(feature = "memory-cache")]
@ -416,17 +408,15 @@ impl Cacher for InMemoryCache {
log::info!("Initialising in-memory cache"); log::info!("Initialising in-memory cache");
InMemoryCache { InMemoryCache {
cache: Arc::new( cache: MokaCache::builder()
MokaCache::builder() .time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
.time_to_live(Duration::from_secs(config.cache_expiry_time.into())) .build(),
.build(),
),
} }
} }
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> { async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url); let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string).await { match self.cache.get(&hashed_url_string) {
Some(res) => self.post_process_search_results(res).await, Some(res) => self.post_process_search_results(res).await,
None => Err(Report::new(CacheError::MissingValue)), None => Err(Report::new(CacheError::MissingValue)),
} }
@ -437,18 +427,13 @@ impl Cacher for InMemoryCache {
search_results: &[SearchResults], search_results: &[SearchResults],
urls: &[String], urls: &[String],
) -> Result<(), Report<CacheError>> { ) -> Result<(), Report<CacheError>> {
let mut tasks: Vec<_> = Vec::with_capacity(urls.len());
for (url, search_result) in urls.iter().zip(search_results.iter()) { for (url, search_result) in urls.iter().zip(search_results.iter()) {
let hashed_url_string = self.hash_url(url); let hashed_url_string = self.hash_url(url);
let bytes = self.pre_process_search_results(search_result).await?; let bytes = self.pre_process_search_results(search_result).await?;
let new_self = self.clone(); self.cache.insert(hashed_url_string, bytes);
tasks.push(tokio::spawn(async move {
new_self.cache.insert(hashed_url_string, bytes).await
}));
} }
join_all(tasks).await; self.cache.sync();
Ok(()) Ok(())
} }
} }
@ -546,7 +531,7 @@ impl SharedCache {
/// # Arguments /// # Arguments
/// ///
/// * `url` - It takes the search url as an argument which will be used as the key to fetch the /// * `url` - It takes the search url as an argument which will be used as the key to fetch the
/// cached results from the cache. /// cached results from the cache.
/// ///
/// # Error /// # Error
/// ///
@ -563,9 +548,9 @@ impl SharedCache {
/// # Arguments /// # Arguments
/// ///
/// * `search_results` - It takes the `SearchResults` as an argument which are results that /// * `search_results` - It takes the `SearchResults` as an argument which are results that
/// needs to be cached. /// needs to be cached.
/// * `url` - It takes the search url as an argument which will be used as the key for storing /// * `url` - It takes the search url as an argument which will be used as the key for storing
/// results in the cache. /// results in the cache.
/// ///
/// # Error /// # Error
/// ///

View File

@ -16,7 +16,7 @@ const REDIS_PIPELINE_SIZE: usize = 3;
/// connect to. /// connect to.
pub struct RedisCache { pub struct RedisCache {
/// It stores a pool of connections ready to be used. /// It stores a pool of connections ready to be used.
connection_pool: Box<[ConnectionManager]>, connection_pool: Vec<ConnectionManager>,
/// It stores the size of the connection pool (in other words the number of /// It stores the size of the connection pool (in other words the number of
/// connections that should be stored in the pool). /// connections that should be stored in the pool).
pool_size: u8, pool_size: u8,
@ -58,13 +58,13 @@ impl RedisCache {
})); }));
} }
let mut outputs = Vec::with_capacity(tasks.len()); let mut outputs = Vec::new();
for task in tasks { for task in tasks {
outputs.push(task.await??); outputs.push(task.await??);
} }
let redis_cache = RedisCache { let redis_cache = RedisCache {
connection_pool: outputs.into_boxed_slice(), connection_pool: outputs,
pool_size, pool_size,
current_connection: Default::default(), current_connection: Default::default(),
cache_ttl, cache_ttl,

View File

@ -48,8 +48,6 @@ pub struct Config {
pub tcp_connection_keep_alive: u8, pub tcp_connection_keep_alive: u8,
/// It stores the pool idle connection timeout in seconds. /// It stores the pool idle connection timeout in seconds.
pub pool_idle_connection_timeout: u8, pub pool_idle_connection_timeout: u8,
/// It stores the number of https connections to keep in the pool.
pub number_of_https_connections: u8,
} }
impl Config { impl Config {
@ -59,7 +57,7 @@ impl Config {
/// # Arguments /// # Arguments
/// ///
/// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get /// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get
/// initialized twice. Pass false if the logger has not yet been initialized. /// initialized twice. Pass false if the logger has not yet been initialized.
/// ///
/// # Error /// # Error
/// ///
@ -141,7 +139,6 @@ impl Config {
request_timeout: globals.get::<_, u8>("request_timeout")?, request_timeout: globals.get::<_, u8>("request_timeout")?,
tcp_connection_keep_alive: globals.get::<_, u8>("tcp_connection_keep_alive")?, tcp_connection_keep_alive: globals.get::<_, u8>("tcp_connection_keep_alive")?,
pool_idle_connection_timeout: globals.get::<_, u8>("pool_idle_connection_timeout")?, pool_idle_connection_timeout: globals.get::<_, u8>("pool_idle_connection_timeout")?,
number_of_https_connections: globals.get::<_, u8>("number_of_https_connections")?,
threads, threads,
client_connection_keep_alive: globals.get::<_, u8>("client_connection_keep_alive")?, client_connection_keep_alive: globals.get::<_, u8>("client_connection_keep_alive")?,
rate_limiter: RateLimiter { rate_limiter: RateLimiter {

View File

@ -3,6 +3,7 @@
use super::engine_models::EngineError; use super::engine_models::EngineError;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
#[cfg(any( #[cfg(any(
feature = "use-synonyms-search", feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search" feature = "use-non-static-synonyms-search"
@ -11,9 +12,7 @@ use thesaurus::synonyms;
/// A named struct to store the raw scraped search results scraped search results from the /// A named struct to store the raw scraped search results scraped search results from the
/// upstream search engines before aggregating it.It derives the Clone trait which is needed /// upstream search engines before aggregating it.It derives the Clone trait which is needed
/// to write idiomatic rust using `Iterators`. /// to write idiomatic rust using `Iterators`.
/// /// (href url in html in simple words).
/// (href url in html in simple words).
///
#[derive(Clone, Serialize, Deserialize)] #[derive(Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct SearchResult { pub struct SearchResult {
@ -24,7 +23,7 @@ pub struct SearchResult {
/// The description of the search result. /// The description of the search result.
pub description: String, pub description: String,
/// The names of the upstream engines from which this results were provided. /// The names of the upstream engines from which this results were provided.
pub engine: Vec<String>, pub engine: SmallVec<[String; 0]>,
/// The td-tdf score of the result in regards to the title, url and description and the user's query /// The td-tdf score of the result in regards to the title, url and description and the user's query
pub relevance_score: f32, pub relevance_score: f32,
} }
@ -36,7 +35,7 @@ impl SearchResult {
/// ///
/// * `title` - The title of the search result. /// * `title` - The title of the search result.
/// * `url` - The url which is accessed when clicked on it /// * `url` - The url which is accessed when clicked on it
/// (href url in html in simple words). /// (href url in html in simple words).
/// * `description` - The description of the search result. /// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided. /// * `engine` - The names of the upstream engines from which this results were provided.
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self { pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
@ -126,7 +125,7 @@ impl EngineErrorInfo {
/// # Arguments /// # Arguments
/// ///
/// * `error` - It takes the error type which occured while fetching the result from a particular /// * `error` - It takes the error type which occured while fetching the result from a particular
/// search engine. /// search engine.
/// * `engine` - It takes the name of the engine that failed to provide the requested search results. /// * `engine` - It takes the name of the engine that failed to provide the requested search results.
pub fn new(error: &EngineError, engine: &str) -> Self { pub fn new(error: &EngineError, engine: &str) -> Self {
Self { Self {
@ -154,10 +153,10 @@ impl EngineErrorInfo {
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct SearchResults { pub struct SearchResults {
/// Stores the individual serializable `SearchResult` struct into a vector of /// Stores the individual serializable `SearchResult` struct into a vector of
pub results: Box<[SearchResult]>, pub results: Vec<SearchResult>,
/// Stores the information on which engines failed with their engine name /// Stores the information on which engines failed with their engine name
/// and the type of error that caused it. /// and the type of error that caused it.
pub engine_errors_info: Box<[EngineErrorInfo]>, pub engine_errors_info: Vec<EngineErrorInfo>,
/// Stores the flag option which holds the check value that the following /// Stores the flag option which holds the check value that the following
/// search query was disallowed when the safe search level set to 4 and it /// search query was disallowed when the safe search level set to 4 and it
/// was present in the `Blocklist` file. /// was present in the `Blocklist` file.
@ -179,15 +178,15 @@ impl SearchResults {
/// # Arguments /// # Arguments
/// ///
/// * `results` - Takes an argument of individual serializable `SearchResult` struct /// * `results` - Takes an argument of individual serializable `SearchResult` struct
/// and stores it into a vector of `SearchResult` structs. /// and stores it into a vector of `SearchResult` structs.
/// * `page_query` - Takes an argument of current page`s search query `q` provided in /// * `page_query` - Takes an argument of current page`s search query `q` provided in
/// the search url. /// the search url.
/// * `engine_errors_info` - Takes an array of structs which contains information regarding /// * `engine_errors_info` - Takes an array of structs which contains information regarding
/// which engines failed with their names, reason and their severity color name. /// which engines failed with their names, reason and their severity color name.
pub fn new(results: Box<[SearchResult]>, engine_errors_info: Box<[EngineErrorInfo]>) -> Self { pub fn new(results: Vec<SearchResult>, engine_errors_info: &[EngineErrorInfo]) -> Self {
Self { Self {
results, results,
engine_errors_info, engine_errors_info: engine_errors_info.to_owned(),
disallowed: Default::default(), disallowed: Default::default(),
filtered: Default::default(), filtered: Default::default(),
safe_search_level: Default::default(), safe_search_level: Default::default(),
@ -206,11 +205,11 @@ impl SearchResults {
} }
/// A getter function that gets the value of `engine_errors_info`. /// A getter function that gets the value of `engine_errors_info`.
pub fn engine_errors_info(&mut self) -> Box<[EngineErrorInfo]> { pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
std::mem::take(&mut self.engine_errors_info) std::mem::take(&mut self.engine_errors_info)
} }
/// A getter function that gets the value of `results`. /// A getter function that gets the value of `results`.
pub fn results(&mut self) -> Box<[SearchResult]> { pub fn results(&mut self) -> Vec<SearchResult> {
self.results.clone() self.results.clone()
} }
@ -255,50 +254,27 @@ fn calculate_tf_idf(
let tf_idf = TfIdf::new(params); let tf_idf = TfIdf::new(params);
let tokener = Tokenizer::new(query, stop_words, Some(punctuation)); let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
let query_tokens = tokener.split_into_words(); let query_tokens = tokener.split_into_words();
let mut search_tokens = vec![];
#[cfg(any( for token in query_tokens {
feature = "use-synonyms-search", #[cfg(any(
feature = "use-non-static-synonyms-search" feature = "use-synonyms-search",
))] feature = "use-non-static-synonyms-search"
let mut extra_tokens = vec![]; ))]
{
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
let synonyms = synonyms(&token);
search_tokens.extend(synonyms)
}
search_tokens.push(token);
}
let total_score: f32 = query_tokens let mut total_score = 0.0f32;
.iter() for token in search_tokens.iter() {
.map(|token| { total_score += tf_idf.get_score(token);
#[cfg(any( }
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
{
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
extra_tokens.extend(synonyms(token))
}
tf_idf.get_score(token) let result = total_score / (search_tokens.len() as f32);
})
.sum();
#[cfg(not(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
)))]
let result = total_score / (query_tokens.len() as f32);
#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
let extra_total_score: f32 = extra_tokens
.iter()
.map(|token| tf_idf.get_score(token))
.sum();
#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
let result =
(extra_total_score + total_score) / ((query_tokens.len() + extra_tokens.len()) as f32);
f32::from(!result.is_nan()) * result f32::from(!result.is_nan()) * result
} }

View File

@ -29,7 +29,7 @@ impl Style {
/// ///
/// * `theme` - It takes the parsed theme option used to set a theme for the website. /// * `theme` - It takes the parsed theme option used to set a theme for the website.
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme /// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
/// for the theme being used. /// for the theme being used.
pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self { pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self {
Style { Style {
theme, theme,

View File

@ -11,7 +11,7 @@ use super::parser_models::Style;
pub struct SearchParams { pub struct SearchParams {
/// It stores the search parameter option `q` (or query in simple words) /// It stores the search parameter option `q` (or query in simple words)
/// of the search url. /// of the search url.
pub q: Option<Cow<'static, str>>, pub q: Option<String>,
/// It stores the search parameter `page` (or pageno in simple words) /// It stores the search parameter `page` (or pageno in simple words)
/// of the search url. /// of the search url.
pub page: Option<u32>, pub page: Option<u32>,
@ -29,7 +29,7 @@ pub struct Cookie<'a> {
/// It stores the colorscheme name used for the website theme. /// It stores the colorscheme name used for the website theme.
pub colorscheme: Cow<'a, str>, pub colorscheme: Cow<'a, str>,
/// It stores the user selected upstream search engines selected from the UI. /// It stores the user selected upstream search engines selected from the UI.
pub engines: Cow<'a, [Cow<'a, str>]>, pub engines: Cow<'a, Vec<Cow<'a, str>>>,
/// It stores the user selected safe search level from the UI. /// It stores the user selected safe search level from the UI.
pub safe_search_level: u8, pub safe_search_level: u8,
} }

View File

@ -14,6 +14,7 @@ use futures::stream::FuturesUnordered;
use regex::Regex; use regex::Regex;
use reqwest::{Client, ClientBuilder}; use reqwest::{Client, ClientBuilder};
use std::sync::Arc; use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::{ use tokio::{
fs::File, fs::File,
io::{AsyncBufReadExt, BufReader}, io::{AsyncBufReadExt, BufReader},
@ -60,7 +61,7 @@ type FutureVec =
/// * `debug` - Accepts a boolean value to enable or disable debug mode option. /// * `debug` - Accepts a boolean value to enable or disable debug mode option.
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the /// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
/// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout. /// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout.
/// user through the UI or the config file. /// user through the UI or the config file.
/// ///
/// # Error /// # Error
/// ///
@ -81,7 +82,6 @@ pub async fn aggregate(
config.pool_idle_connection_timeout as u64, config.pool_idle_connection_timeout as u64,
)) ))
.tcp_keepalive(Duration::from_secs(config.tcp_connection_keep_alive as u64)) .tcp_keepalive(Duration::from_secs(config.tcp_connection_keep_alive as u64))
.pool_max_idle_per_host(config.number_of_https_connections as usize)
.connect_timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server .connect_timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
.https_only(true) .https_only(true)
.gzip(true) .gzip(true)
@ -93,6 +93,13 @@ pub async fn aggregate(
let user_agent: &str = random_user_agent(); let user_agent: &str = random_user_agent();
// Add a random delay before making the request.
if config.aggregator.random_delay || !config.debug {
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
tokio::time::sleep(Duration::from_secs(delay)).await;
}
let mut names: Vec<&str> = Vec::with_capacity(0); let mut names: Vec<&str> = Vec::with_capacity(0);
// create tasks for upstream result fetching // create tasks for upstream result fetching
@ -181,21 +188,19 @@ pub async fn aggregate(
drop(blacklist_map); drop(blacklist_map);
} }
let mut results: Box<[SearchResult]> = result_map let mut results: Vec<SearchResult> = result_map
.into_iter() .iter()
.map(|(_, mut value)| { .map(|(_, value)| {
if !value.url.contains("temu.com") { let mut copy = value.clone();
value.calculate_relevance(query.as_str()) if !copy.url.contains("temu.com") {
copy.calculate_relevance(query.as_str())
} }
value copy
}) })
.collect(); .collect();
sort_search_results(&mut results); sort_search_results(&mut results);
Ok(SearchResults::new( Ok(SearchResults::new(results, &engine_errors_info))
results,
engine_errors_info.into_boxed_slice(),
))
} }
/// Filters a map of search results using a list of regex patterns. /// Filters a map of search results using a list of regex patterns.
@ -260,6 +265,7 @@ fn sort_search_results(results: &mut [SearchResult]) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use smallvec::smallvec;
use std::io::Write; use std::io::Write;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
@ -275,7 +281,7 @@ mod tests {
description: "This domain is for use in illustrative examples in documents." description: "This domain is for use in illustrative examples in documents."
.to_owned(), .to_owned(),
relevance_score: 0.0, relevance_score: 0.0,
engine: vec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
)); ));
map_to_be_filtered.push(( map_to_be_filtered.push((
@ -284,7 +290,7 @@ mod tests {
title: "Rust Programming Language".to_owned(), title: "Rust Programming Language".to_owned(),
url: "https://www.rust-lang.org/".to_owned(), url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
relevance_score:0.0 relevance_score:0.0
},) },)
); );
@ -325,7 +331,7 @@ mod tests {
url: "https://www.example.com".to_owned(), url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents." description: "This domain is for use in illustrative examples in documents."
.to_owned(), .to_owned(),
engine: vec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
relevance_score: 0.0, relevance_score: 0.0,
}, },
)); ));
@ -335,7 +341,7 @@ mod tests {
title: "Rust Programming Language".to_owned(), title: "Rust Programming Language".to_owned(),
url: "https://www.rust-lang.org/".to_owned(), url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
relevance_score:0.0 relevance_score:0.0
}, },
)); ));
@ -392,7 +398,7 @@ mod tests {
url: "https://www.example.com".to_owned(), url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents." description: "This domain is for use in illustrative examples in documents."
.to_owned(), .to_owned(),
engine: vec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
relevance_score: 0.0, relevance_score: 0.0,
}, },
)); ));

View File

@ -14,8 +14,7 @@ use crate::{
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse}; use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use itertools::Itertools; use itertools::Itertools;
use regex::Regex; use regex::Regex;
use std::time::{SystemTime, UNIX_EPOCH}; use std::borrow::Cow;
use std::{borrow::Cow, time::Duration};
use tokio::{ use tokio::{
fs::File, fs::File,
io::{AsyncBufReadExt, BufReader}, io::{AsyncBufReadExt, BufReader},
@ -84,13 +83,6 @@ pub async fn search(
let previous_page = page.saturating_sub(1); let previous_page = page.saturating_sub(1);
let next_page = page + 1; let next_page = page + 1;
// Add a random delay before making the request.
if config.aggregator.random_delay || !config.debug {
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
tokio::time::sleep(Duration::from_secs(delay)).await;
}
let results: (SearchResults, String, bool); let results: (SearchResults, String, bool);
if page != previous_page { if page != previous_page {
let (previous_results, current_results, next_results) = join!( let (previous_results, current_results, next_results) = join!(
@ -104,7 +96,9 @@ pub async fn search(
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) = let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
[previous_results?, results.clone(), next_results?] [previous_results?, results.clone(), next_results?]
.into_iter() .into_iter()
.filter_map(|(result, cache_key, flag)| flag.then_some((result, cache_key))) .filter_map(|(result, cache_key, flag)| {
dbg!(flag).then_some((result, cache_key))
})
.multiunzip(); .multiunzip();
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await }); tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
@ -146,7 +140,7 @@ pub async fn search(
/// # Arguments /// # Arguments
/// ///
/// * `url` - It takes the url of the current page that requested the search results for a /// * `url` - It takes the url of the current page that requested the search results for a
/// particular search query. /// particular search query.
/// * `config` - It takes a parsed config struct. /// * `config` - It takes a parsed config struct.
/// * `query` - It takes the page number as u32 value. /// * `query` - It takes the page number as u32 value.
/// * `req` - It takes the `HttpRequest` struct as a value. /// * `req` - It takes the `HttpRequest` struct as a value.

View File

@ -12,7 +12,7 @@ const SAFE_SEARCH_LEVELS_NAME: [&str; 3] = ["None", "Low", "Moderate"];
/// # Arguments /// # Arguments
/// ///
/// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream /// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream
/// search engine which failed to provide results as an argument. /// search engine which failed to provide results as an argument.
/// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument. /// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument.
/// * `query` - It takes the current search query provided by user as an argument. /// * `query` - It takes the current search query provided by user as an argument.
/// ///

View File

@ -9,7 +9,7 @@ use maud::{html, Markup};
/// # Arguments /// # Arguments
/// ///
/// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding /// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding
/// selected (enabled/disabled) value as an argument. /// selected (enabled/disabled) value as an argument.
/// ///
/// # Returns /// # Returns
/// ///

View File

@ -11,9 +11,9 @@ use std::fs::read_dir;
/// # Arguments /// # Arguments
/// ///
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an /// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
/// argument. /// argument.
/// * `selected_style` - It takes the currently selected style value provided via the config file /// * `selected_style` - It takes the currently selected style value provided via the config file
/// as an argument. /// as an argument.
/// ///
/// # Error /// # Error
/// ///