mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 05:58:21 -05:00
Merge pull request #604 from neon-mmd/PERF/603_several-optimizations-to-improve-search-engine-performance
⚡ Several optimizations to improve search engine performance
This commit is contained in:
commit
193b4e36db
1160
Cargo.lock
generated
1160
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
18
Cargo.toml
18
Cargo.toml
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "1.17.20"
|
version = "1.17.22"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||||
repository = "https://github.com/neon-mmd/websurfx"
|
repository = "https://github.com/neon-mmd/websurfx"
|
||||||
@ -27,6 +27,7 @@ tokio = { version = "1.32.0", features = [
|
|||||||
], default-features = false }
|
], default-features = false }
|
||||||
serde = { version = "1.0.209", default-features = false, features = ["derive"] }
|
serde = { version = "1.0.209", default-features = false, features = ["derive"] }
|
||||||
serde_json = { version = "1.0.122", default-features = false }
|
serde_json = { version = "1.0.122", default-features = false }
|
||||||
|
bincode = {version="1.3.3", default-features=false}
|
||||||
maud = { version = "0.26.0", default-features = false, features = [
|
maud = { version = "0.26.0", default-features = false, features = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
] }
|
] }
|
||||||
@ -48,6 +49,7 @@ mlua = { version = "0.9.9", features = [
|
|||||||
redis = { version = "0.25.4", features = [
|
redis = { version = "0.25.4", features = [
|
||||||
"tokio-comp",
|
"tokio-comp",
|
||||||
"connection-manager",
|
"connection-manager",
|
||||||
|
"tcp_nodelay"
|
||||||
], default-features = false, optional = true }
|
], default-features = false, optional = true }
|
||||||
blake3 = { version = "1.5.4", default-features = false }
|
blake3 = { version = "1.5.4", default-features = false }
|
||||||
error-stack = { version = "0.4.0", default-features = false, features = [
|
error-stack = { version = "0.4.0", default-features = false, features = [
|
||||||
@ -55,17 +57,13 @@ error-stack = { version = "0.4.0", default-features = false, features = [
|
|||||||
] }
|
] }
|
||||||
async-trait = { version = "0.1.80", default-features = false }
|
async-trait = { version = "0.1.80", default-features = false }
|
||||||
regex = { version = "1.9.4", features = ["perf"], default-features = false }
|
regex = { version = "1.9.4", features = ["perf"], default-features = false }
|
||||||
smallvec = { version = "1.13.1", features = [
|
|
||||||
"union",
|
|
||||||
"serde",
|
|
||||||
], default-features = false }
|
|
||||||
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
|
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
|
||||||
dhat = { version = "0.3.2", optional = true, default-features = false }
|
dhat = { version = "0.3.2", optional = true, default-features = false }
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
async-once-cell = { version = "0.5.3", default-features = false }
|
async-once-cell = { version = "0.5.3", default-features = false }
|
||||||
actix-governor = { version = "0.5.0", default-features = false }
|
actix-governor = { version = "0.5.0", default-features = false }
|
||||||
mini-moka = { version = "0.10", optional = true, default-features = false, features = [
|
moka = { version = "0.12.8", optional = true, default-features = false, features = [
|
||||||
"sync",
|
"future",
|
||||||
] }
|
] }
|
||||||
async-compression = { version = "0.4.12", default-features = false, features = [
|
async-compression = { version = "0.4.12", default-features = false, features = [
|
||||||
"brotli",
|
"brotli",
|
||||||
@ -82,8 +80,8 @@ base64 = { version = "0.21.5", default-features = false, features = [
|
|||||||
cfg-if = { version = "1.0.0", default-features = false, optional = true }
|
cfg-if = { version = "1.0.0", default-features = false, optional = true }
|
||||||
keyword_extraction = { version = "1.4.3", default-features = false, features = [
|
keyword_extraction = { version = "1.4.3", default-features = false, features = [
|
||||||
"tf_idf",
|
"tf_idf",
|
||||||
|
"rayon",
|
||||||
] }
|
] }
|
||||||
|
|
||||||
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
|
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
|
||||||
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
|
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
|
||||||
"moby",
|
"moby",
|
||||||
@ -104,8 +102,6 @@ lightningcss = { version = "1.0.0-alpha.57", default-features = false, features
|
|||||||
# Temporary fork with fix
|
# Temporary fork with fix
|
||||||
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
|
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[profile.dev]
|
[profile.dev]
|
||||||
opt-level = 0
|
opt-level = 0
|
||||||
debug = true
|
debug = true
|
||||||
@ -180,7 +176,7 @@ opt-level = "z"
|
|||||||
use-synonyms-search = ["thesaurus/static"]
|
use-synonyms-search = ["thesaurus/static"]
|
||||||
default = ["memory-cache"]
|
default = ["memory-cache"]
|
||||||
dhat-heap = ["dep:dhat"]
|
dhat-heap = ["dep:dhat"]
|
||||||
memory-cache = ["dep:mini-moka"]
|
memory-cache = ["dep:moka"]
|
||||||
redis-cache = ["dep:redis", "dep:base64"]
|
redis-cache = ["dep:redis", "dep:base64"]
|
||||||
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
|
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
|
||||||
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
|
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
|
||||||
|
41
src/cache/cacher.rs
vendored
41
src/cache/cacher.rs
vendored
@ -2,10 +2,9 @@
|
|||||||
//! from the upstream search engines in a json format.
|
//! from the upstream search engines in a json format.
|
||||||
|
|
||||||
use error_stack::Report;
|
use error_stack::Report;
|
||||||
|
use futures::future::join_all;
|
||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
use mini_moka::sync::Cache as MokaCache;
|
use moka::future::Cache as MokaCache;
|
||||||
#[cfg(feature = "memory-cache")]
|
|
||||||
use mini_moka::sync::ConcurrentCacheExt;
|
|
||||||
|
|
||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
@ -376,13 +375,13 @@ impl Cacher for RedisCache {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// TryInto implementation for SearchResults from Vec<u8>
|
/// TryInto implementation for SearchResults from Vec<u8>
|
||||||
use std::convert::TryInto;
|
use std::{convert::TryInto, sync::Arc};
|
||||||
|
|
||||||
impl TryInto<SearchResults> for Vec<u8> {
|
impl TryInto<SearchResults> for Vec<u8> {
|
||||||
type Error = CacheError;
|
type Error = CacheError;
|
||||||
|
|
||||||
fn try_into(self) -> Result<SearchResults, Self::Error> {
|
fn try_into(self) -> Result<SearchResults, Self::Error> {
|
||||||
serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError)
|
bincode::deserialize_from(self.as_slice()).map_err(|_| CacheError::SerializationError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -390,7 +389,7 @@ impl TryInto<Vec<u8>> for &SearchResults {
|
|||||||
type Error = CacheError;
|
type Error = CacheError;
|
||||||
|
|
||||||
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
|
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
|
||||||
serde_json::to_vec(self).map_err(|_| CacheError::SerializationError)
|
bincode::serialize(self).map_err(|_| CacheError::SerializationError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -398,7 +397,16 @@ impl TryInto<Vec<u8>> for &SearchResults {
|
|||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
pub struct InMemoryCache {
|
pub struct InMemoryCache {
|
||||||
/// The backend cache which stores data.
|
/// The backend cache which stores data.
|
||||||
cache: MokaCache<String, Vec<u8>>,
|
cache: Arc<MokaCache<String, Vec<u8>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "memory-cache")]
|
||||||
|
impl Clone for InMemoryCache {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
cache: self.cache.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
@ -408,15 +416,17 @@ impl Cacher for InMemoryCache {
|
|||||||
log::info!("Initialising in-memory cache");
|
log::info!("Initialising in-memory cache");
|
||||||
|
|
||||||
InMemoryCache {
|
InMemoryCache {
|
||||||
cache: MokaCache::builder()
|
cache: Arc::new(
|
||||||
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
|
MokaCache::builder()
|
||||||
.build(),
|
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
|
||||||
|
.build(),
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
||||||
let hashed_url_string = self.hash_url(url);
|
let hashed_url_string = self.hash_url(url);
|
||||||
match self.cache.get(&hashed_url_string) {
|
match self.cache.get(&hashed_url_string).await {
|
||||||
Some(res) => self.post_process_search_results(res).await,
|
Some(res) => self.post_process_search_results(res).await,
|
||||||
None => Err(Report::new(CacheError::MissingValue)),
|
None => Err(Report::new(CacheError::MissingValue)),
|
||||||
}
|
}
|
||||||
@ -427,13 +437,18 @@ impl Cacher for InMemoryCache {
|
|||||||
search_results: &[SearchResults],
|
search_results: &[SearchResults],
|
||||||
urls: &[String],
|
urls: &[String],
|
||||||
) -> Result<(), Report<CacheError>> {
|
) -> Result<(), Report<CacheError>> {
|
||||||
|
let mut tasks: Vec<_> = Vec::with_capacity(urls.len());
|
||||||
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
||||||
let hashed_url_string = self.hash_url(url);
|
let hashed_url_string = self.hash_url(url);
|
||||||
let bytes = self.pre_process_search_results(search_result).await?;
|
let bytes = self.pre_process_search_results(search_result).await?;
|
||||||
self.cache.insert(hashed_url_string, bytes);
|
let new_self = self.clone();
|
||||||
|
tasks.push(tokio::spawn(async move {
|
||||||
|
new_self.cache.insert(hashed_url_string, bytes).await
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
self.cache.sync();
|
join_all(tasks).await;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
6
src/cache/redis_cacher.rs
vendored
6
src/cache/redis_cacher.rs
vendored
@ -16,7 +16,7 @@ const REDIS_PIPELINE_SIZE: usize = 3;
|
|||||||
/// connect to.
|
/// connect to.
|
||||||
pub struct RedisCache {
|
pub struct RedisCache {
|
||||||
/// It stores a pool of connections ready to be used.
|
/// It stores a pool of connections ready to be used.
|
||||||
connection_pool: Vec<ConnectionManager>,
|
connection_pool: Box<[ConnectionManager]>,
|
||||||
/// It stores the size of the connection pool (in other words the number of
|
/// It stores the size of the connection pool (in other words the number of
|
||||||
/// connections that should be stored in the pool).
|
/// connections that should be stored in the pool).
|
||||||
pool_size: u8,
|
pool_size: u8,
|
||||||
@ -58,13 +58,13 @@ impl RedisCache {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut outputs = Vec::new();
|
let mut outputs = Vec::with_capacity(tasks.len());
|
||||||
for task in tasks {
|
for task in tasks {
|
||||||
outputs.push(task.await??);
|
outputs.push(task.await??);
|
||||||
}
|
}
|
||||||
|
|
||||||
let redis_cache = RedisCache {
|
let redis_cache = RedisCache {
|
||||||
connection_pool: outputs,
|
connection_pool: outputs.into_boxed_slice(),
|
||||||
pool_size,
|
pool_size,
|
||||||
current_connection: Default::default(),
|
current_connection: Default::default(),
|
||||||
cache_ttl,
|
cache_ttl,
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
use super::engine_models::EngineError;
|
use super::engine_models::EngineError;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use smallvec::SmallVec;
|
|
||||||
#[cfg(any(
|
#[cfg(any(
|
||||||
feature = "use-synonyms-search",
|
feature = "use-synonyms-search",
|
||||||
feature = "use-non-static-synonyms-search"
|
feature = "use-non-static-synonyms-search"
|
||||||
@ -23,7 +22,7 @@ pub struct SearchResult {
|
|||||||
/// The description of the search result.
|
/// The description of the search result.
|
||||||
pub description: String,
|
pub description: String,
|
||||||
/// The names of the upstream engines from which this results were provided.
|
/// The names of the upstream engines from which this results were provided.
|
||||||
pub engine: SmallVec<[String; 0]>,
|
pub engine: Vec<String>,
|
||||||
/// The td-tdf score of the result in regards to the title, url and description and the user's query
|
/// The td-tdf score of the result in regards to the title, url and description and the user's query
|
||||||
pub relevance_score: f32,
|
pub relevance_score: f32,
|
||||||
}
|
}
|
||||||
@ -153,10 +152,10 @@ impl EngineErrorInfo {
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResults {
|
pub struct SearchResults {
|
||||||
/// Stores the individual serializable `SearchResult` struct into a vector of
|
/// Stores the individual serializable `SearchResult` struct into a vector of
|
||||||
pub results: Vec<SearchResult>,
|
pub results: Box<[SearchResult]>,
|
||||||
/// Stores the information on which engines failed with their engine name
|
/// Stores the information on which engines failed with their engine name
|
||||||
/// and the type of error that caused it.
|
/// and the type of error that caused it.
|
||||||
pub engine_errors_info: Vec<EngineErrorInfo>,
|
pub engine_errors_info: Box<[EngineErrorInfo]>,
|
||||||
/// Stores the flag option which holds the check value that the following
|
/// Stores the flag option which holds the check value that the following
|
||||||
/// search query was disallowed when the safe search level set to 4 and it
|
/// search query was disallowed when the safe search level set to 4 and it
|
||||||
/// was present in the `Blocklist` file.
|
/// was present in the `Blocklist` file.
|
||||||
@ -183,10 +182,10 @@ impl SearchResults {
|
|||||||
/// the search url.
|
/// the search url.
|
||||||
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
||||||
/// which engines failed with their names, reason and their severity color name.
|
/// which engines failed with their names, reason and their severity color name.
|
||||||
pub fn new(results: Vec<SearchResult>, engine_errors_info: &[EngineErrorInfo]) -> Self {
|
pub fn new(results: Box<[SearchResult]>, engine_errors_info: Box<[EngineErrorInfo]>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
results,
|
results,
|
||||||
engine_errors_info: engine_errors_info.to_owned(),
|
engine_errors_info,
|
||||||
disallowed: Default::default(),
|
disallowed: Default::default(),
|
||||||
filtered: Default::default(),
|
filtered: Default::default(),
|
||||||
safe_search_level: Default::default(),
|
safe_search_level: Default::default(),
|
||||||
@ -205,11 +204,11 @@ impl SearchResults {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A getter function that gets the value of `engine_errors_info`.
|
/// A getter function that gets the value of `engine_errors_info`.
|
||||||
pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
|
pub fn engine_errors_info(&mut self) -> Box<[EngineErrorInfo]> {
|
||||||
std::mem::take(&mut self.engine_errors_info)
|
std::mem::take(&mut self.engine_errors_info)
|
||||||
}
|
}
|
||||||
/// A getter function that gets the value of `results`.
|
/// A getter function that gets the value of `results`.
|
||||||
pub fn results(&mut self) -> Vec<SearchResult> {
|
pub fn results(&mut self) -> Box<[SearchResult]> {
|
||||||
self.results.clone()
|
self.results.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,27 +253,50 @@ fn calculate_tf_idf(
|
|||||||
let tf_idf = TfIdf::new(params);
|
let tf_idf = TfIdf::new(params);
|
||||||
let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
|
let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
|
||||||
let query_tokens = tokener.split_into_words();
|
let query_tokens = tokener.split_into_words();
|
||||||
let mut search_tokens = vec![];
|
|
||||||
|
|
||||||
for token in query_tokens {
|
#[cfg(any(
|
||||||
#[cfg(any(
|
feature = "use-synonyms-search",
|
||||||
feature = "use-synonyms-search",
|
feature = "use-non-static-synonyms-search"
|
||||||
feature = "use-non-static-synonyms-search"
|
))]
|
||||||
))]
|
let mut extra_tokens = vec![];
|
||||||
{
|
|
||||||
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
|
|
||||||
let synonyms = synonyms(&token);
|
|
||||||
search_tokens.extend(synonyms)
|
|
||||||
}
|
|
||||||
search_tokens.push(token);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut total_score = 0.0f32;
|
let total_score: f32 = query_tokens
|
||||||
for token in search_tokens.iter() {
|
.iter()
|
||||||
total_score += tf_idf.get_score(token);
|
.map(|token| {
|
||||||
}
|
#[cfg(any(
|
||||||
|
feature = "use-synonyms-search",
|
||||||
|
feature = "use-non-static-synonyms-search"
|
||||||
|
))]
|
||||||
|
{
|
||||||
|
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
|
||||||
|
extra_tokens.extend(synonyms(token))
|
||||||
|
}
|
||||||
|
|
||||||
let result = total_score / (search_tokens.len() as f32);
|
tf_idf.get_score(token)
|
||||||
|
})
|
||||||
|
.sum();
|
||||||
|
|
||||||
|
#[cfg(not(any(
|
||||||
|
feature = "use-synonyms-search",
|
||||||
|
feature = "use-non-static-synonyms-search"
|
||||||
|
)))]
|
||||||
|
let result = total_score / (query_tokens.len() as f32);
|
||||||
|
|
||||||
|
#[cfg(any(
|
||||||
|
feature = "use-synonyms-search",
|
||||||
|
feature = "use-non-static-synonyms-search"
|
||||||
|
))]
|
||||||
|
let extra_total_score: f32 = extra_tokens
|
||||||
|
.iter()
|
||||||
|
.map(|token| tf_idf.get_score(token))
|
||||||
|
.sum();
|
||||||
|
|
||||||
|
#[cfg(any(
|
||||||
|
feature = "use-synonyms-search",
|
||||||
|
feature = "use-non-static-synonyms-search"
|
||||||
|
))]
|
||||||
|
let result =
|
||||||
|
(extra_total_score + total_score) / ((query_tokens.len() + extra_tokens.len()) as f32);
|
||||||
|
|
||||||
f32::from(!result.is_nan()) * result
|
f32::from(!result.is_nan()) * result
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,7 @@ use super::parser_models::Style;
|
|||||||
pub struct SearchParams {
|
pub struct SearchParams {
|
||||||
/// It stores the search parameter option `q` (or query in simple words)
|
/// It stores the search parameter option `q` (or query in simple words)
|
||||||
/// of the search url.
|
/// of the search url.
|
||||||
pub q: Option<String>,
|
pub q: Option<Cow<'static, str>>,
|
||||||
/// It stores the search parameter `page` (or pageno in simple words)
|
/// It stores the search parameter `page` (or pageno in simple words)
|
||||||
/// of the search url.
|
/// of the search url.
|
||||||
pub page: Option<u32>,
|
pub page: Option<u32>,
|
||||||
@ -29,7 +29,7 @@ pub struct Cookie<'a> {
|
|||||||
/// It stores the colorscheme name used for the website theme.
|
/// It stores the colorscheme name used for the website theme.
|
||||||
pub colorscheme: Cow<'a, str>,
|
pub colorscheme: Cow<'a, str>,
|
||||||
/// It stores the user selected upstream search engines selected from the UI.
|
/// It stores the user selected upstream search engines selected from the UI.
|
||||||
pub engines: Cow<'a, Vec<Cow<'a, str>>>,
|
pub engines: Cow<'a, [Cow<'a, str>]>,
|
||||||
/// It stores the user selected safe search level from the UI.
|
/// It stores the user selected safe search level from the UI.
|
||||||
pub safe_search_level: u8,
|
pub safe_search_level: u8,
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,6 @@ use futures::stream::FuturesUnordered;
|
|||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::{Client, ClientBuilder};
|
use reqwest::{Client, ClientBuilder};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
|
||||||
use tokio::{
|
use tokio::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{AsyncBufReadExt, BufReader},
|
io::{AsyncBufReadExt, BufReader},
|
||||||
@ -93,13 +92,6 @@ pub async fn aggregate(
|
|||||||
|
|
||||||
let user_agent: &str = random_user_agent();
|
let user_agent: &str = random_user_agent();
|
||||||
|
|
||||||
// Add a random delay before making the request.
|
|
||||||
if config.aggregator.random_delay || !config.debug {
|
|
||||||
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
|
||||||
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
|
||||||
tokio::time::sleep(Duration::from_secs(delay)).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut names: Vec<&str> = Vec::with_capacity(0);
|
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||||
|
|
||||||
// create tasks for upstream result fetching
|
// create tasks for upstream result fetching
|
||||||
@ -188,19 +180,21 @@ pub async fn aggregate(
|
|||||||
drop(blacklist_map);
|
drop(blacklist_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut results: Vec<SearchResult> = result_map
|
let mut results: Box<[SearchResult]> = result_map
|
||||||
.iter()
|
.into_iter()
|
||||||
.map(|(_, value)| {
|
.map(|(_, mut value)| {
|
||||||
let mut copy = value.clone();
|
if !value.url.contains("temu.com") {
|
||||||
if !copy.url.contains("temu.com") {
|
value.calculate_relevance(query.as_str())
|
||||||
copy.calculate_relevance(query.as_str())
|
|
||||||
}
|
}
|
||||||
copy
|
value
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
sort_search_results(&mut results);
|
sort_search_results(&mut results);
|
||||||
|
|
||||||
Ok(SearchResults::new(results, &engine_errors_info))
|
Ok(SearchResults::new(
|
||||||
|
results,
|
||||||
|
engine_errors_info.into_boxed_slice(),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Filters a map of search results using a list of regex patterns.
|
/// Filters a map of search results using a list of regex patterns.
|
||||||
@ -265,7 +259,6 @@ fn sort_search_results(results: &mut [SearchResult]) {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use smallvec::smallvec;
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
@ -281,7 +274,7 @@ mod tests {
|
|||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
relevance_score: 0.0,
|
relevance_score: 0.0,
|
||||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
map_to_be_filtered.push((
|
map_to_be_filtered.push((
|
||||||
@ -290,7 +283,7 @@ mod tests {
|
|||||||
title: "Rust Programming Language".to_owned(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_owned(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
relevance_score:0.0
|
relevance_score:0.0
|
||||||
},)
|
},)
|
||||||
);
|
);
|
||||||
@ -331,7 +324,7 @@ mod tests {
|
|||||||
url: "https://www.example.com".to_owned(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
relevance_score: 0.0,
|
relevance_score: 0.0,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
@ -341,7 +334,7 @@ mod tests {
|
|||||||
title: "Rust Programming Language".to_owned(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_owned(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
relevance_score:0.0
|
relevance_score:0.0
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
@ -398,7 +391,7 @@ mod tests {
|
|||||||
url: "https://www.example.com".to_owned(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
relevance_score: 0.0,
|
relevance_score: 0.0,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
|
@ -14,7 +14,8 @@ use crate::{
|
|||||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::borrow::Cow;
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
use std::{borrow::Cow, time::Duration};
|
||||||
use tokio::{
|
use tokio::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{AsyncBufReadExt, BufReader},
|
io::{AsyncBufReadExt, BufReader},
|
||||||
@ -83,6 +84,13 @@ pub async fn search(
|
|||||||
let previous_page = page.saturating_sub(1);
|
let previous_page = page.saturating_sub(1);
|
||||||
let next_page = page + 1;
|
let next_page = page + 1;
|
||||||
|
|
||||||
|
// Add a random delay before making the request.
|
||||||
|
if config.aggregator.random_delay || !config.debug {
|
||||||
|
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
||||||
|
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
||||||
|
tokio::time::sleep(Duration::from_secs(delay)).await;
|
||||||
|
}
|
||||||
|
|
||||||
let results: (SearchResults, String, bool);
|
let results: (SearchResults, String, bool);
|
||||||
if page != previous_page {
|
if page != previous_page {
|
||||||
let (previous_results, current_results, next_results) = join!(
|
let (previous_results, current_results, next_results) = join!(
|
||||||
@ -96,9 +104,7 @@ pub async fn search(
|
|||||||
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
|
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
|
||||||
[previous_results?, results.clone(), next_results?]
|
[previous_results?, results.clone(), next_results?]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(result, cache_key, flag)| {
|
.filter_map(|(result, cache_key, flag)| flag.then_some((result, cache_key)))
|
||||||
dbg!(flag).then_some((result, cache_key))
|
|
||||||
})
|
|
||||||
.multiunzip();
|
.multiunzip();
|
||||||
|
|
||||||
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
||||||
|
Loading…
Reference in New Issue
Block a user