mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-26 07:58:20 -05:00
Compare commits
No commits in common. "b7d0ef7252288d2bc5cdd189f534af375f28b3f5" and "c6b93403b8964243ddbce5df2b657b25a909a1c0" have entirely different histories.
b7d0ef7252
...
c6b93403b8
1160
Cargo.lock
generated
1160
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
18
Cargo.toml
18
Cargo.toml
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "1.17.22"
|
version = "1.17.20"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||||
repository = "https://github.com/neon-mmd/websurfx"
|
repository = "https://github.com/neon-mmd/websurfx"
|
||||||
@ -27,7 +27,6 @@ tokio = { version = "1.32.0", features = [
|
|||||||
], default-features = false }
|
], default-features = false }
|
||||||
serde = { version = "1.0.209", default-features = false, features = ["derive"] }
|
serde = { version = "1.0.209", default-features = false, features = ["derive"] }
|
||||||
serde_json = { version = "1.0.122", default-features = false }
|
serde_json = { version = "1.0.122", default-features = false }
|
||||||
bincode = {version="1.3.3", default-features=false}
|
|
||||||
maud = { version = "0.26.0", default-features = false, features = [
|
maud = { version = "0.26.0", default-features = false, features = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
] }
|
] }
|
||||||
@ -49,7 +48,6 @@ mlua = { version = "0.9.9", features = [
|
|||||||
redis = { version = "0.25.4", features = [
|
redis = { version = "0.25.4", features = [
|
||||||
"tokio-comp",
|
"tokio-comp",
|
||||||
"connection-manager",
|
"connection-manager",
|
||||||
"tcp_nodelay"
|
|
||||||
], default-features = false, optional = true }
|
], default-features = false, optional = true }
|
||||||
blake3 = { version = "1.5.4", default-features = false }
|
blake3 = { version = "1.5.4", default-features = false }
|
||||||
error-stack = { version = "0.4.0", default-features = false, features = [
|
error-stack = { version = "0.4.0", default-features = false, features = [
|
||||||
@ -57,13 +55,17 @@ error-stack = { version = "0.4.0", default-features = false, features = [
|
|||||||
] }
|
] }
|
||||||
async-trait = { version = "0.1.80", default-features = false }
|
async-trait = { version = "0.1.80", default-features = false }
|
||||||
regex = { version = "1.9.4", features = ["perf"], default-features = false }
|
regex = { version = "1.9.4", features = ["perf"], default-features = false }
|
||||||
|
smallvec = { version = "1.13.1", features = [
|
||||||
|
"union",
|
||||||
|
"serde",
|
||||||
|
], default-features = false }
|
||||||
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
|
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
|
||||||
dhat = { version = "0.3.2", optional = true, default-features = false }
|
dhat = { version = "0.3.2", optional = true, default-features = false }
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
async-once-cell = { version = "0.5.3", default-features = false }
|
async-once-cell = { version = "0.5.3", default-features = false }
|
||||||
actix-governor = { version = "0.5.0", default-features = false }
|
actix-governor = { version = "0.5.0", default-features = false }
|
||||||
moka = { version = "0.12.8", optional = true, default-features = false, features = [
|
mini-moka = { version = "0.10", optional = true, default-features = false, features = [
|
||||||
"future",
|
"sync",
|
||||||
] }
|
] }
|
||||||
async-compression = { version = "0.4.12", default-features = false, features = [
|
async-compression = { version = "0.4.12", default-features = false, features = [
|
||||||
"brotli",
|
"brotli",
|
||||||
@ -80,8 +82,8 @@ base64 = { version = "0.21.5", default-features = false, features = [
|
|||||||
cfg-if = { version = "1.0.0", default-features = false, optional = true }
|
cfg-if = { version = "1.0.0", default-features = false, optional = true }
|
||||||
keyword_extraction = { version = "1.4.3", default-features = false, features = [
|
keyword_extraction = { version = "1.4.3", default-features = false, features = [
|
||||||
"tf_idf",
|
"tf_idf",
|
||||||
"rayon",
|
|
||||||
] }
|
] }
|
||||||
|
|
||||||
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
|
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
|
||||||
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
|
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
|
||||||
"moby",
|
"moby",
|
||||||
@ -102,6 +104,8 @@ lightningcss = { version = "1.0.0-alpha.57", default-features = false, features
|
|||||||
# Temporary fork with fix
|
# Temporary fork with fix
|
||||||
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
|
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[profile.dev]
|
[profile.dev]
|
||||||
opt-level = 0
|
opt-level = 0
|
||||||
debug = true
|
debug = true
|
||||||
@ -176,7 +180,7 @@ opt-level = "z"
|
|||||||
use-synonyms-search = ["thesaurus/static"]
|
use-synonyms-search = ["thesaurus/static"]
|
||||||
default = ["memory-cache"]
|
default = ["memory-cache"]
|
||||||
dhat-heap = ["dep:dhat"]
|
dhat-heap = ["dep:dhat"]
|
||||||
memory-cache = ["dep:moka"]
|
memory-cache = ["dep:mini-moka"]
|
||||||
redis-cache = ["dep:redis", "dep:base64"]
|
redis-cache = ["dep:redis", "dep:base64"]
|
||||||
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
|
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
|
||||||
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
|
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
|
||||||
|
@ -34,11 +34,11 @@
|
|||||||
},
|
},
|
||||||
"nixpkgs_2": {
|
"nixpkgs_2": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1725194671,
|
"lastModified": 1695318763,
|
||||||
"narHash": "sha256-tLGCFEFTB5TaOKkpfw3iYT9dnk4awTP/q4w+ROpMfuw=",
|
"narHash": "sha256-FHVPDRP2AfvsxAdc+AsgFJevMz5VBmnZglFUMlxBkcY=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "b833ff01a0d694b910daca6e2ff4a3f26dee478c",
|
"rev": "e12483116b3b51a185a33a272bf351e357ba9a99",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -36,7 +36,7 @@
|
|||||||
haskellPackages.hadolint
|
haskellPackages.hadolint
|
||||||
nodejs
|
nodejs
|
||||||
nodePackages_latest.cspell
|
nodePackages_latest.cspell
|
||||||
eslint
|
nodePackages_latest.eslint
|
||||||
nodePackages_latest.markdownlint-cli2
|
nodePackages_latest.markdownlint-cli2
|
||||||
nodePackages_latest.stylelint
|
nodePackages_latest.stylelint
|
||||||
redis
|
redis
|
||||||
|
47
src/cache/cacher.rs
vendored
47
src/cache/cacher.rs
vendored
@ -2,9 +2,10 @@
|
|||||||
//! from the upstream search engines in a json format.
|
//! from the upstream search engines in a json format.
|
||||||
|
|
||||||
use error_stack::Report;
|
use error_stack::Report;
|
||||||
use futures::future::join_all;
|
|
||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
use moka::future::Cache as MokaCache;
|
use mini_moka::sync::Cache as MokaCache;
|
||||||
|
#[cfg(feature = "memory-cache")]
|
||||||
|
use mini_moka::sync::ConcurrentCacheExt;
|
||||||
|
|
||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
@ -375,13 +376,13 @@ impl Cacher for RedisCache {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// TryInto implementation for SearchResults from Vec<u8>
|
/// TryInto implementation for SearchResults from Vec<u8>
|
||||||
use std::{convert::TryInto, sync::Arc};
|
use std::convert::TryInto;
|
||||||
|
|
||||||
impl TryInto<SearchResults> for Vec<u8> {
|
impl TryInto<SearchResults> for Vec<u8> {
|
||||||
type Error = CacheError;
|
type Error = CacheError;
|
||||||
|
|
||||||
fn try_into(self) -> Result<SearchResults, Self::Error> {
|
fn try_into(self) -> Result<SearchResults, Self::Error> {
|
||||||
bincode::deserialize_from(self.as_slice()).map_err(|_| CacheError::SerializationError)
|
serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,7 +390,7 @@ impl TryInto<Vec<u8>> for &SearchResults {
|
|||||||
type Error = CacheError;
|
type Error = CacheError;
|
||||||
|
|
||||||
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
|
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
|
||||||
bincode::serialize(self).map_err(|_| CacheError::SerializationError)
|
serde_json::to_vec(self).map_err(|_| CacheError::SerializationError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,16 +398,7 @@ impl TryInto<Vec<u8>> for &SearchResults {
|
|||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
pub struct InMemoryCache {
|
pub struct InMemoryCache {
|
||||||
/// The backend cache which stores data.
|
/// The backend cache which stores data.
|
||||||
cache: Arc<MokaCache<String, Vec<u8>>>,
|
cache: MokaCache<String, Vec<u8>>,
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "memory-cache")]
|
|
||||||
impl Clone for InMemoryCache {
|
|
||||||
fn clone(&self) -> Self {
|
|
||||||
Self {
|
|
||||||
cache: self.cache.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "memory-cache")]
|
#[cfg(feature = "memory-cache")]
|
||||||
@ -416,17 +408,15 @@ impl Cacher for InMemoryCache {
|
|||||||
log::info!("Initialising in-memory cache");
|
log::info!("Initialising in-memory cache");
|
||||||
|
|
||||||
InMemoryCache {
|
InMemoryCache {
|
||||||
cache: Arc::new(
|
cache: MokaCache::builder()
|
||||||
MokaCache::builder()
|
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
|
||||||
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
|
.build(),
|
||||||
.build(),
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
||||||
let hashed_url_string = self.hash_url(url);
|
let hashed_url_string = self.hash_url(url);
|
||||||
match self.cache.get(&hashed_url_string).await {
|
match self.cache.get(&hashed_url_string) {
|
||||||
Some(res) => self.post_process_search_results(res).await,
|
Some(res) => self.post_process_search_results(res).await,
|
||||||
None => Err(Report::new(CacheError::MissingValue)),
|
None => Err(Report::new(CacheError::MissingValue)),
|
||||||
}
|
}
|
||||||
@ -437,18 +427,13 @@ impl Cacher for InMemoryCache {
|
|||||||
search_results: &[SearchResults],
|
search_results: &[SearchResults],
|
||||||
urls: &[String],
|
urls: &[String],
|
||||||
) -> Result<(), Report<CacheError>> {
|
) -> Result<(), Report<CacheError>> {
|
||||||
let mut tasks: Vec<_> = Vec::with_capacity(urls.len());
|
|
||||||
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
||||||
let hashed_url_string = self.hash_url(url);
|
let hashed_url_string = self.hash_url(url);
|
||||||
let bytes = self.pre_process_search_results(search_result).await?;
|
let bytes = self.pre_process_search_results(search_result).await?;
|
||||||
let new_self = self.clone();
|
self.cache.insert(hashed_url_string, bytes);
|
||||||
tasks.push(tokio::spawn(async move {
|
|
||||||
new_self.cache.insert(hashed_url_string, bytes).await
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
join_all(tasks).await;
|
self.cache.sync();
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -546,7 +531,7 @@ impl SharedCache {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `url` - It takes the search url as an argument which will be used as the key to fetch the
|
/// * `url` - It takes the search url as an argument which will be used as the key to fetch the
|
||||||
/// cached results from the cache.
|
/// cached results from the cache.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
///
|
///
|
||||||
@ -563,9 +548,9 @@ impl SharedCache {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `search_results` - It takes the `SearchResults` as an argument which are results that
|
/// * `search_results` - It takes the `SearchResults` as an argument which are results that
|
||||||
/// needs to be cached.
|
/// needs to be cached.
|
||||||
/// * `url` - It takes the search url as an argument which will be used as the key for storing
|
/// * `url` - It takes the search url as an argument which will be used as the key for storing
|
||||||
/// results in the cache.
|
/// results in the cache.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
///
|
///
|
||||||
|
6
src/cache/redis_cacher.rs
vendored
6
src/cache/redis_cacher.rs
vendored
@ -16,7 +16,7 @@ const REDIS_PIPELINE_SIZE: usize = 3;
|
|||||||
/// connect to.
|
/// connect to.
|
||||||
pub struct RedisCache {
|
pub struct RedisCache {
|
||||||
/// It stores a pool of connections ready to be used.
|
/// It stores a pool of connections ready to be used.
|
||||||
connection_pool: Box<[ConnectionManager]>,
|
connection_pool: Vec<ConnectionManager>,
|
||||||
/// It stores the size of the connection pool (in other words the number of
|
/// It stores the size of the connection pool (in other words the number of
|
||||||
/// connections that should be stored in the pool).
|
/// connections that should be stored in the pool).
|
||||||
pool_size: u8,
|
pool_size: u8,
|
||||||
@ -58,13 +58,13 @@ impl RedisCache {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut outputs = Vec::with_capacity(tasks.len());
|
let mut outputs = Vec::new();
|
||||||
for task in tasks {
|
for task in tasks {
|
||||||
outputs.push(task.await??);
|
outputs.push(task.await??);
|
||||||
}
|
}
|
||||||
|
|
||||||
let redis_cache = RedisCache {
|
let redis_cache = RedisCache {
|
||||||
connection_pool: outputs.into_boxed_slice(),
|
connection_pool: outputs,
|
||||||
pool_size,
|
pool_size,
|
||||||
current_connection: Default::default(),
|
current_connection: Default::default(),
|
||||||
cache_ttl,
|
cache_ttl,
|
||||||
|
@ -48,8 +48,6 @@ pub struct Config {
|
|||||||
pub tcp_connection_keep_alive: u8,
|
pub tcp_connection_keep_alive: u8,
|
||||||
/// It stores the pool idle connection timeout in seconds.
|
/// It stores the pool idle connection timeout in seconds.
|
||||||
pub pool_idle_connection_timeout: u8,
|
pub pool_idle_connection_timeout: u8,
|
||||||
/// It stores the number of https connections to keep in the pool.
|
|
||||||
pub number_of_https_connections: u8,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
@ -59,7 +57,7 @@ impl Config {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get
|
/// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get
|
||||||
/// initialized twice. Pass false if the logger has not yet been initialized.
|
/// initialized twice. Pass false if the logger has not yet been initialized.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
///
|
///
|
||||||
@ -141,7 +139,6 @@ impl Config {
|
|||||||
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
||||||
tcp_connection_keep_alive: globals.get::<_, u8>("tcp_connection_keep_alive")?,
|
tcp_connection_keep_alive: globals.get::<_, u8>("tcp_connection_keep_alive")?,
|
||||||
pool_idle_connection_timeout: globals.get::<_, u8>("pool_idle_connection_timeout")?,
|
pool_idle_connection_timeout: globals.get::<_, u8>("pool_idle_connection_timeout")?,
|
||||||
number_of_https_connections: globals.get::<_, u8>("number_of_https_connections")?,
|
|
||||||
threads,
|
threads,
|
||||||
client_connection_keep_alive: globals.get::<_, u8>("client_connection_keep_alive")?,
|
client_connection_keep_alive: globals.get::<_, u8>("client_connection_keep_alive")?,
|
||||||
rate_limiter: RateLimiter {
|
rate_limiter: RateLimiter {
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
use super::engine_models::EngineError;
|
use super::engine_models::EngineError;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use smallvec::SmallVec;
|
||||||
#[cfg(any(
|
#[cfg(any(
|
||||||
feature = "use-synonyms-search",
|
feature = "use-synonyms-search",
|
||||||
feature = "use-non-static-synonyms-search"
|
feature = "use-non-static-synonyms-search"
|
||||||
@ -11,9 +12,7 @@ use thesaurus::synonyms;
|
|||||||
/// A named struct to store the raw scraped search results scraped search results from the
|
/// A named struct to store the raw scraped search results scraped search results from the
|
||||||
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
||||||
/// to write idiomatic rust using `Iterators`.
|
/// to write idiomatic rust using `Iterators`.
|
||||||
///
|
/// (href url in html in simple words).
|
||||||
/// (href url in html in simple words).
|
|
||||||
///
|
|
||||||
#[derive(Clone, Serialize, Deserialize)]
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
@ -24,7 +23,7 @@ pub struct SearchResult {
|
|||||||
/// The description of the search result.
|
/// The description of the search result.
|
||||||
pub description: String,
|
pub description: String,
|
||||||
/// The names of the upstream engines from which this results were provided.
|
/// The names of the upstream engines from which this results were provided.
|
||||||
pub engine: Vec<String>,
|
pub engine: SmallVec<[String; 0]>,
|
||||||
/// The td-tdf score of the result in regards to the title, url and description and the user's query
|
/// The td-tdf score of the result in regards to the title, url and description and the user's query
|
||||||
pub relevance_score: f32,
|
pub relevance_score: f32,
|
||||||
}
|
}
|
||||||
@ -36,7 +35,7 @@ impl SearchResult {
|
|||||||
///
|
///
|
||||||
/// * `title` - The title of the search result.
|
/// * `title` - The title of the search result.
|
||||||
/// * `url` - The url which is accessed when clicked on it
|
/// * `url` - The url which is accessed when clicked on it
|
||||||
/// (href url in html in simple words).
|
/// (href url in html in simple words).
|
||||||
/// * `description` - The description of the search result.
|
/// * `description` - The description of the search result.
|
||||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||||
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
||||||
@ -126,7 +125,7 @@ impl EngineErrorInfo {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
||||||
/// search engine.
|
/// search engine.
|
||||||
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
||||||
pub fn new(error: &EngineError, engine: &str) -> Self {
|
pub fn new(error: &EngineError, engine: &str) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@ -154,10 +153,10 @@ impl EngineErrorInfo {
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResults {
|
pub struct SearchResults {
|
||||||
/// Stores the individual serializable `SearchResult` struct into a vector of
|
/// Stores the individual serializable `SearchResult` struct into a vector of
|
||||||
pub results: Box<[SearchResult]>,
|
pub results: Vec<SearchResult>,
|
||||||
/// Stores the information on which engines failed with their engine name
|
/// Stores the information on which engines failed with their engine name
|
||||||
/// and the type of error that caused it.
|
/// and the type of error that caused it.
|
||||||
pub engine_errors_info: Box<[EngineErrorInfo]>,
|
pub engine_errors_info: Vec<EngineErrorInfo>,
|
||||||
/// Stores the flag option which holds the check value that the following
|
/// Stores the flag option which holds the check value that the following
|
||||||
/// search query was disallowed when the safe search level set to 4 and it
|
/// search query was disallowed when the safe search level set to 4 and it
|
||||||
/// was present in the `Blocklist` file.
|
/// was present in the `Blocklist` file.
|
||||||
@ -179,15 +178,15 @@ impl SearchResults {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
||||||
/// and stores it into a vector of `SearchResult` structs.
|
/// and stores it into a vector of `SearchResult` structs.
|
||||||
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
||||||
/// the search url.
|
/// the search url.
|
||||||
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
||||||
/// which engines failed with their names, reason and their severity color name.
|
/// which engines failed with their names, reason and their severity color name.
|
||||||
pub fn new(results: Box<[SearchResult]>, engine_errors_info: Box<[EngineErrorInfo]>) -> Self {
|
pub fn new(results: Vec<SearchResult>, engine_errors_info: &[EngineErrorInfo]) -> Self {
|
||||||
Self {
|
Self {
|
||||||
results,
|
results,
|
||||||
engine_errors_info,
|
engine_errors_info: engine_errors_info.to_owned(),
|
||||||
disallowed: Default::default(),
|
disallowed: Default::default(),
|
||||||
filtered: Default::default(),
|
filtered: Default::default(),
|
||||||
safe_search_level: Default::default(),
|
safe_search_level: Default::default(),
|
||||||
@ -206,11 +205,11 @@ impl SearchResults {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A getter function that gets the value of `engine_errors_info`.
|
/// A getter function that gets the value of `engine_errors_info`.
|
||||||
pub fn engine_errors_info(&mut self) -> Box<[EngineErrorInfo]> {
|
pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
|
||||||
std::mem::take(&mut self.engine_errors_info)
|
std::mem::take(&mut self.engine_errors_info)
|
||||||
}
|
}
|
||||||
/// A getter function that gets the value of `results`.
|
/// A getter function that gets the value of `results`.
|
||||||
pub fn results(&mut self) -> Box<[SearchResult]> {
|
pub fn results(&mut self) -> Vec<SearchResult> {
|
||||||
self.results.clone()
|
self.results.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -255,50 +254,27 @@ fn calculate_tf_idf(
|
|||||||
let tf_idf = TfIdf::new(params);
|
let tf_idf = TfIdf::new(params);
|
||||||
let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
|
let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
|
||||||
let query_tokens = tokener.split_into_words();
|
let query_tokens = tokener.split_into_words();
|
||||||
|
let mut search_tokens = vec![];
|
||||||
|
|
||||||
#[cfg(any(
|
for token in query_tokens {
|
||||||
feature = "use-synonyms-search",
|
#[cfg(any(
|
||||||
feature = "use-non-static-synonyms-search"
|
feature = "use-synonyms-search",
|
||||||
))]
|
feature = "use-non-static-synonyms-search"
|
||||||
let mut extra_tokens = vec![];
|
))]
|
||||||
|
{
|
||||||
|
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
|
||||||
|
let synonyms = synonyms(&token);
|
||||||
|
search_tokens.extend(synonyms)
|
||||||
|
}
|
||||||
|
search_tokens.push(token);
|
||||||
|
}
|
||||||
|
|
||||||
let total_score: f32 = query_tokens
|
let mut total_score = 0.0f32;
|
||||||
.iter()
|
for token in search_tokens.iter() {
|
||||||
.map(|token| {
|
total_score += tf_idf.get_score(token);
|
||||||
#[cfg(any(
|
}
|
||||||
feature = "use-synonyms-search",
|
|
||||||
feature = "use-non-static-synonyms-search"
|
|
||||||
))]
|
|
||||||
{
|
|
||||||
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
|
|
||||||
extra_tokens.extend(synonyms(token))
|
|
||||||
}
|
|
||||||
|
|
||||||
tf_idf.get_score(token)
|
let result = total_score / (search_tokens.len() as f32);
|
||||||
})
|
|
||||||
.sum();
|
|
||||||
|
|
||||||
#[cfg(not(any(
|
|
||||||
feature = "use-synonyms-search",
|
|
||||||
feature = "use-non-static-synonyms-search"
|
|
||||||
)))]
|
|
||||||
let result = total_score / (query_tokens.len() as f32);
|
|
||||||
|
|
||||||
#[cfg(any(
|
|
||||||
feature = "use-synonyms-search",
|
|
||||||
feature = "use-non-static-synonyms-search"
|
|
||||||
))]
|
|
||||||
let extra_total_score: f32 = extra_tokens
|
|
||||||
.iter()
|
|
||||||
.map(|token| tf_idf.get_score(token))
|
|
||||||
.sum();
|
|
||||||
|
|
||||||
#[cfg(any(
|
|
||||||
feature = "use-synonyms-search",
|
|
||||||
feature = "use-non-static-synonyms-search"
|
|
||||||
))]
|
|
||||||
let result =
|
|
||||||
(extra_total_score + total_score) / ((query_tokens.len() + extra_tokens.len()) as f32);
|
|
||||||
|
|
||||||
f32::from(!result.is_nan()) * result
|
f32::from(!result.is_nan()) * result
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,7 @@ impl Style {
|
|||||||
///
|
///
|
||||||
/// * `theme` - It takes the parsed theme option used to set a theme for the website.
|
/// * `theme` - It takes the parsed theme option used to set a theme for the website.
|
||||||
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
|
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
|
||||||
/// for the theme being used.
|
/// for the theme being used.
|
||||||
pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self {
|
pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self {
|
||||||
Style {
|
Style {
|
||||||
theme,
|
theme,
|
||||||
|
@ -11,7 +11,7 @@ use super::parser_models::Style;
|
|||||||
pub struct SearchParams {
|
pub struct SearchParams {
|
||||||
/// It stores the search parameter option `q` (or query in simple words)
|
/// It stores the search parameter option `q` (or query in simple words)
|
||||||
/// of the search url.
|
/// of the search url.
|
||||||
pub q: Option<Cow<'static, str>>,
|
pub q: Option<String>,
|
||||||
/// It stores the search parameter `page` (or pageno in simple words)
|
/// It stores the search parameter `page` (or pageno in simple words)
|
||||||
/// of the search url.
|
/// of the search url.
|
||||||
pub page: Option<u32>,
|
pub page: Option<u32>,
|
||||||
@ -29,7 +29,7 @@ pub struct Cookie<'a> {
|
|||||||
/// It stores the colorscheme name used for the website theme.
|
/// It stores the colorscheme name used for the website theme.
|
||||||
pub colorscheme: Cow<'a, str>,
|
pub colorscheme: Cow<'a, str>,
|
||||||
/// It stores the user selected upstream search engines selected from the UI.
|
/// It stores the user selected upstream search engines selected from the UI.
|
||||||
pub engines: Cow<'a, [Cow<'a, str>]>,
|
pub engines: Cow<'a, Vec<Cow<'a, str>>>,
|
||||||
/// It stores the user selected safe search level from the UI.
|
/// It stores the user selected safe search level from the UI.
|
||||||
pub safe_search_level: u8,
|
pub safe_search_level: u8,
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@ use futures::stream::FuturesUnordered;
|
|||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::{Client, ClientBuilder};
|
use reqwest::{Client, ClientBuilder};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
use tokio::{
|
use tokio::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{AsyncBufReadExt, BufReader},
|
io::{AsyncBufReadExt, BufReader},
|
||||||
@ -60,7 +61,7 @@ type FutureVec =
|
|||||||
/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
|
/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
|
||||||
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
|
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
|
||||||
/// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout.
|
/// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout.
|
||||||
/// user through the UI or the config file.
|
/// user through the UI or the config file.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
///
|
///
|
||||||
@ -81,7 +82,6 @@ pub async fn aggregate(
|
|||||||
config.pool_idle_connection_timeout as u64,
|
config.pool_idle_connection_timeout as u64,
|
||||||
))
|
))
|
||||||
.tcp_keepalive(Duration::from_secs(config.tcp_connection_keep_alive as u64))
|
.tcp_keepalive(Duration::from_secs(config.tcp_connection_keep_alive as u64))
|
||||||
.pool_max_idle_per_host(config.number_of_https_connections as usize)
|
|
||||||
.connect_timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
.connect_timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
||||||
.https_only(true)
|
.https_only(true)
|
||||||
.gzip(true)
|
.gzip(true)
|
||||||
@ -93,6 +93,13 @@ pub async fn aggregate(
|
|||||||
|
|
||||||
let user_agent: &str = random_user_agent();
|
let user_agent: &str = random_user_agent();
|
||||||
|
|
||||||
|
// Add a random delay before making the request.
|
||||||
|
if config.aggregator.random_delay || !config.debug {
|
||||||
|
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
||||||
|
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
||||||
|
tokio::time::sleep(Duration::from_secs(delay)).await;
|
||||||
|
}
|
||||||
|
|
||||||
let mut names: Vec<&str> = Vec::with_capacity(0);
|
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||||
|
|
||||||
// create tasks for upstream result fetching
|
// create tasks for upstream result fetching
|
||||||
@ -181,21 +188,19 @@ pub async fn aggregate(
|
|||||||
drop(blacklist_map);
|
drop(blacklist_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut results: Box<[SearchResult]> = result_map
|
let mut results: Vec<SearchResult> = result_map
|
||||||
.into_iter()
|
.iter()
|
||||||
.map(|(_, mut value)| {
|
.map(|(_, value)| {
|
||||||
if !value.url.contains("temu.com") {
|
let mut copy = value.clone();
|
||||||
value.calculate_relevance(query.as_str())
|
if !copy.url.contains("temu.com") {
|
||||||
|
copy.calculate_relevance(query.as_str())
|
||||||
}
|
}
|
||||||
value
|
copy
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
sort_search_results(&mut results);
|
sort_search_results(&mut results);
|
||||||
|
|
||||||
Ok(SearchResults::new(
|
Ok(SearchResults::new(results, &engine_errors_info))
|
||||||
results,
|
|
||||||
engine_errors_info.into_boxed_slice(),
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Filters a map of search results using a list of regex patterns.
|
/// Filters a map of search results using a list of regex patterns.
|
||||||
@ -260,6 +265,7 @@ fn sort_search_results(results: &mut [SearchResult]) {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use smallvec::smallvec;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
@ -275,7 +281,7 @@ mod tests {
|
|||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
relevance_score: 0.0,
|
relevance_score: 0.0,
|
||||||
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
map_to_be_filtered.push((
|
map_to_be_filtered.push((
|
||||||
@ -284,7 +290,7 @@ mod tests {
|
|||||||
title: "Rust Programming Language".to_owned(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_owned(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
relevance_score:0.0
|
relevance_score:0.0
|
||||||
},)
|
},)
|
||||||
);
|
);
|
||||||
@ -325,7 +331,7 @@ mod tests {
|
|||||||
url: "https://www.example.com".to_owned(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
relevance_score: 0.0,
|
relevance_score: 0.0,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
@ -335,7 +341,7 @@ mod tests {
|
|||||||
title: "Rust Programming Language".to_owned(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_owned(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: vec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
relevance_score:0.0
|
relevance_score:0.0
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
@ -392,7 +398,7 @@ mod tests {
|
|||||||
url: "https://www.example.com".to_owned(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_owned(), "Bing".to_owned()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
relevance_score: 0.0,
|
relevance_score: 0.0,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
|
@ -14,8 +14,7 @@ use crate::{
|
|||||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
use std::borrow::Cow;
|
||||||
use std::{borrow::Cow, time::Duration};
|
|
||||||
use tokio::{
|
use tokio::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{AsyncBufReadExt, BufReader},
|
io::{AsyncBufReadExt, BufReader},
|
||||||
@ -84,13 +83,6 @@ pub async fn search(
|
|||||||
let previous_page = page.saturating_sub(1);
|
let previous_page = page.saturating_sub(1);
|
||||||
let next_page = page + 1;
|
let next_page = page + 1;
|
||||||
|
|
||||||
// Add a random delay before making the request.
|
|
||||||
if config.aggregator.random_delay || !config.debug {
|
|
||||||
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
|
||||||
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
|
||||||
tokio::time::sleep(Duration::from_secs(delay)).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
let results: (SearchResults, String, bool);
|
let results: (SearchResults, String, bool);
|
||||||
if page != previous_page {
|
if page != previous_page {
|
||||||
let (previous_results, current_results, next_results) = join!(
|
let (previous_results, current_results, next_results) = join!(
|
||||||
@ -104,7 +96,9 @@ pub async fn search(
|
|||||||
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
|
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
|
||||||
[previous_results?, results.clone(), next_results?]
|
[previous_results?, results.clone(), next_results?]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(result, cache_key, flag)| flag.then_some((result, cache_key)))
|
.filter_map(|(result, cache_key, flag)| {
|
||||||
|
dbg!(flag).then_some((result, cache_key))
|
||||||
|
})
|
||||||
.multiunzip();
|
.multiunzip();
|
||||||
|
|
||||||
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
|
||||||
@ -146,7 +140,7 @@ pub async fn search(
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `url` - It takes the url of the current page that requested the search results for a
|
/// * `url` - It takes the url of the current page that requested the search results for a
|
||||||
/// particular search query.
|
/// particular search query.
|
||||||
/// * `config` - It takes a parsed config struct.
|
/// * `config` - It takes a parsed config struct.
|
||||||
/// * `query` - It takes the page number as u32 value.
|
/// * `query` - It takes the page number as u32 value.
|
||||||
/// * `req` - It takes the `HttpRequest` struct as a value.
|
/// * `req` - It takes the `HttpRequest` struct as a value.
|
||||||
|
@ -12,7 +12,7 @@ const SAFE_SEARCH_LEVELS_NAME: [&str; 3] = ["None", "Low", "Moderate"];
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream
|
/// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream
|
||||||
/// search engine which failed to provide results as an argument.
|
/// search engine which failed to provide results as an argument.
|
||||||
/// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument.
|
/// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument.
|
||||||
/// * `query` - It takes the current search query provided by user as an argument.
|
/// * `query` - It takes the current search query provided by user as an argument.
|
||||||
///
|
///
|
||||||
|
@ -9,7 +9,7 @@ use maud::{html, Markup};
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding
|
/// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding
|
||||||
/// selected (enabled/disabled) value as an argument.
|
/// selected (enabled/disabled) value as an argument.
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
|
@ -11,9 +11,9 @@ use std::fs::read_dir;
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
|
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
|
||||||
/// argument.
|
/// argument.
|
||||||
/// * `selected_style` - It takes the currently selected style value provided via the config file
|
/// * `selected_style` - It takes the currently selected style value provided via the config file
|
||||||
/// as an argument.
|
/// as an argument.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
///
|
///
|
||||||
|
Loading…
Reference in New Issue
Block a user