mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-21 05:38:20 -05:00
⚡ perf: several optimizations for improving the performance of the engine (#540)
* ♻️ refactor: initialize & store the config & cache structs as a constant (#486) - initializes & stores the config & cache structs as a static constant. - Pass the config & cache structs as a static reference to all the functions handling their respective route. * ⚡ perf: replace hashmaps with vectors for fetching & aggregating results (#486) - replace hashmaps with vectors for fetching, collecting & aggregating results as it tends to be contigous & cache efficient data structure. - refactor & redesign algorithms for fetching & aggregating results centered around vectors in aggregate function. * ➕ build: add the future crate (#486) * ⚡ perf: use `futureunordered` for collecting results fetched from the tokio spawn tasks (#486) - using the `futureunordered` instead of vector for collecting results reduces the time it takes to fetch the results as the results do not need to come in specific order so any result that gets fetched first gets collected in the `futureunordered` type. Co-authored-by: Spencerjibz <spencernajib2@gmail.com> * ⚡ perf: initialize new async connections parallely using tokio spawn tasks (#486) * ⚡ perf: initialize redis pipeline struct once with the default size of 3 (#486) * ⚡ perf: reduce branch predictions by reducing conditional code branches (#486) * ✅ test(unit): provide unit test for the `get_safesearch_level` function (#486) * ⚡ perf: reduce clones & use index based loop to improve search results filtering performance (#486) * 🚨 fix(clippy): make clippy/format checks happy (#486) * 🚨 fix(build): make the cargo build check happy (#486) * ⚡ perf: reduce the amount of clones, to_owneds & to_strings (#486) * ⚡ perf: use async crates & methods & make functions async (#486) * 🔖 chore(release): bump the app version (#486) --------- Co-authored-by: Spencerjibz <spencernajib2@gmail.com>
This commit is contained in:
parent
8d9b660eb1
commit
991f3f59de
617
Cargo.lock
generated
617
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
12
Cargo.toml
12
Cargo.toml
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "websurfx"
|
||||
version = "1.9.20"
|
||||
version = "1.10.9"
|
||||
edition = "2021"
|
||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||
repository = "https://github.com/neon-mmd/websurfx"
|
||||
@ -14,7 +14,7 @@ path = "src/bin/websurfx.rs"
|
||||
|
||||
[dependencies]
|
||||
reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
|
||||
tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false}
|
||||
tokio = {version="1.32.0",features=["rt-multi-thread","macros", "fs", "io-util"], default-features = false}
|
||||
serde = {version="1.0.196", default-features=false, features=["derive"]}
|
||||
serde_json = {version="1.0.109", default-features=false}
|
||||
maud = {version="0.25.0", default-features=false, features=["actix-web"]}
|
||||
@ -32,13 +32,13 @@ error-stack = {version="0.4.0", default-features=false, features=["std"]}
|
||||
async-trait = {version="0.1.76", default-features=false}
|
||||
regex = {version="1.9.4", features=["perf"], default-features = false}
|
||||
smallvec = {version="1.13.1", features=["union", "serde"], default-features=false}
|
||||
futures = {version="0.3.28", default-features=false}
|
||||
dhat = {version="0.3.3", optional = true, default-features=false}
|
||||
futures = {version="0.3.30", default-features=false, features=["alloc"]}
|
||||
dhat = {version="0.3.2", optional = true, default-features=false}
|
||||
mimalloc = { version = "0.1.38", default-features = false }
|
||||
async-once-cell = {version="0.5.3", default-features=false}
|
||||
actix-governor = {version="0.5.0", default-features=false}
|
||||
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
|
||||
brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true}
|
||||
async-compression = { version = "0.4.6", default-features = false, features=["brotli","tokio"], optional=true}
|
||||
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
|
||||
chacha20 = {version="0.9.1", default-features=false, optional=true}
|
||||
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
|
||||
@ -84,7 +84,7 @@ default = ["memory-cache"]
|
||||
dhat-heap = ["dep:dhat"]
|
||||
memory-cache = ["dep:mini-moka"]
|
||||
redis-cache = ["dep:redis","dep:base64"]
|
||||
compress-cache-results = ["dep:brotli","dep:cfg-if"]
|
||||
compress-cache-results = ["dep:async-compression","dep:cfg-if"]
|
||||
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
|
||||
cec-cache-results = ["compress-cache-results","encrypt-cache-results"]
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#[cfg(not(feature = "dhat-heap"))]
|
||||
use mimalloc::MiMalloc;
|
||||
|
||||
use std::net::TcpListener;
|
||||
use std::{net::TcpListener, sync::OnceLock};
|
||||
use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
|
||||
|
||||
/// A dhat heap memory profiler
|
||||
@ -17,6 +17,9 @@ static ALLOC: dhat::Alloc = dhat::Alloc;
|
||||
#[global_allocator]
|
||||
static GLOBAL: MiMalloc = MiMalloc;
|
||||
|
||||
/// A static constant for holding the parsed config.
|
||||
static CONFIG: OnceLock<Config> = OnceLock::new();
|
||||
|
||||
/// The function that launches the main server and registers all the routes of the website.
|
||||
///
|
||||
/// # Error
|
||||
@ -29,10 +32,10 @@ async fn main() -> std::io::Result<()> {
|
||||
#[cfg(feature = "dhat-heap")]
|
||||
let _profiler = dhat::Profiler::new_heap();
|
||||
|
||||
// Initialize the parsed config file.
|
||||
let config = Config::parse(false).unwrap();
|
||||
// Initialize the parsed config globally.
|
||||
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
|
||||
|
||||
let cache = create_cache(&config).await;
|
||||
let cache = create_cache(config).await;
|
||||
|
||||
log::info!(
|
||||
"started server on port {} and IP {}",
|
||||
@ -45,7 +48,7 @@ async fn main() -> std::io::Result<()> {
|
||||
config.port,
|
||||
);
|
||||
|
||||
let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?;
|
||||
let listener = TcpListener::bind((config.binding_ip.as_str(), config.port))?;
|
||||
|
||||
run(listener, config, cache)?.await
|
||||
}
|
||||
|
68
src/cache/cacher.rs
vendored
68
src/cache/cacher.rs
vendored
@ -93,7 +93,7 @@ pub trait Cacher: Send + Sync {
|
||||
feature = "encrypt-cache-results",
|
||||
feature = "cec-cache-results"
|
||||
))]
|
||||
fn encrypt_or_decrypt_results(
|
||||
async fn encrypt_or_decrypt_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
encrypt: bool,
|
||||
@ -137,11 +137,19 @@ pub trait Cacher: Send + Sync {
|
||||
/// Returns the compressed bytes on success otherwise it returns a CacheError
|
||||
/// on failure.
|
||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||
fn compress_results(&mut self, mut bytes: Vec<u8>) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use std::io::Write;
|
||||
let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22);
|
||||
async fn compress_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
|
||||
writer
|
||||
.write_all(&bytes)
|
||||
.await
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
writer
|
||||
.shutdown()
|
||||
.await
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
bytes = writer.into_inner();
|
||||
Ok(bytes)
|
||||
@ -159,17 +167,17 @@ pub trait Cacher: Send + Sync {
|
||||
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
|
||||
/// on failure.
|
||||
#[cfg(feature = "cec-cache-results")]
|
||||
fn compress_encrypt_compress_results(
|
||||
async fn compress_encrypt_compress_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
// compress first
|
||||
bytes = self.compress_results(bytes)?;
|
||||
bytes = self.compress_results(bytes).await?;
|
||||
// encrypt
|
||||
bytes = self.encrypt_or_decrypt_results(bytes, true)?;
|
||||
bytes = self.encrypt_or_decrypt_results(bytes, true).await?;
|
||||
|
||||
// compress again;
|
||||
bytes = self.compress_results(bytes)?;
|
||||
bytes = self.compress_results(bytes).await?;
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
@ -187,11 +195,11 @@ pub trait Cacher: Send + Sync {
|
||||
/// on failure.
|
||||
|
||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||
fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(feature = "compress-cache-results")]
|
||||
{
|
||||
decompress_util(bytes)
|
||||
decompress_util(bytes).await
|
||||
|
||||
}
|
||||
else if #[cfg(feature = "cec-cache-results")]
|
||||
@ -199,7 +207,7 @@ pub trait Cacher: Send + Sync {
|
||||
let decompressed = decompress_util(bytes)?;
|
||||
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
|
||||
|
||||
decompress_util(&decrypted)
|
||||
decompress_util(&decrypted).await
|
||||
|
||||
}
|
||||
}
|
||||
@ -216,7 +224,7 @@ pub trait Cacher: Send + Sync {
|
||||
/// # Error
|
||||
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
|
||||
/// on failure.
|
||||
fn pre_process_search_results(
|
||||
async fn pre_process_search_results(
|
||||
&mut self,
|
||||
search_results: &SearchResults,
|
||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
@ -224,19 +232,20 @@ pub trait Cacher: Send + Sync {
|
||||
let mut bytes: Vec<u8> = search_results.try_into()?;
|
||||
#[cfg(feature = "compress-cache-results")]
|
||||
{
|
||||
let compressed = self.compress_results(bytes)?;
|
||||
let compressed = self.compress_results(bytes).await?;
|
||||
bytes = compressed;
|
||||
}
|
||||
|
||||
#[cfg(feature = "encrypt-cache-results")]
|
||||
{
|
||||
let encrypted = self.encrypt_or_decrypt_results(bytes, true)?;
|
||||
let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?;
|
||||
bytes = encrypted;
|
||||
}
|
||||
|
||||
#[cfg(feature = "cec-cache-results")]
|
||||
{
|
||||
let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?;
|
||||
let compressed_encrypted_compressed =
|
||||
self.compress_encrypt_compress_results(bytes).await?;
|
||||
bytes = compressed_encrypted_compressed;
|
||||
}
|
||||
|
||||
@ -256,25 +265,25 @@ pub trait Cacher: Send + Sync {
|
||||
/// on failure.
|
||||
|
||||
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
|
||||
fn post_process_search_results(
|
||||
async fn post_process_search_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
) -> Result<SearchResults, Report<CacheError>> {
|
||||
#[cfg(feature = "compress-cache-results")]
|
||||
{
|
||||
let decompressed = self.decompress_results(&bytes)?;
|
||||
let decompressed = self.decompress_results(&bytes).await?;
|
||||
bytes = decompressed
|
||||
}
|
||||
|
||||
#[cfg(feature = "encrypt-cache-results")]
|
||||
{
|
||||
let decrypted = self.encrypt_or_decrypt_results(bytes, false)?;
|
||||
let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?;
|
||||
bytes = decrypted
|
||||
}
|
||||
|
||||
#[cfg(feature = "cec-cache-results")]
|
||||
{
|
||||
let decompressed_decrypted = self.decompress_results(&bytes)?;
|
||||
let decompressed_decrypted = self.decompress_results(&bytes).await?;
|
||||
bytes = decompressed_decrypted;
|
||||
}
|
||||
|
||||
@ -295,16 +304,19 @@ pub trait Cacher: Send + Sync {
|
||||
/// on failure.
|
||||
|
||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||
fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use std::io::Write;
|
||||
let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096);
|
||||
async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new());
|
||||
|
||||
writer
|
||||
.write_all(input)
|
||||
.await
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
let bytes = writer
|
||||
.into_inner()
|
||||
writer
|
||||
.shutdown()
|
||||
.await
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
let bytes = writer.into_inner();
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
@ -329,7 +341,7 @@ impl Cacher for RedisCache {
|
||||
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
|
||||
.decode(base64_string)
|
||||
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
|
||||
self.post_process_search_results(bytes)
|
||||
self.post_process_search_results(bytes).await
|
||||
}
|
||||
|
||||
async fn cache_results(
|
||||
@ -345,7 +357,7 @@ impl Cacher for RedisCache {
|
||||
let mut bytes = Vec::with_capacity(search_results_len);
|
||||
|
||||
for result in search_results {
|
||||
let processed = self.pre_process_search_results(result)?;
|
||||
let processed = self.pre_process_search_results(result).await?;
|
||||
bytes.push(processed);
|
||||
}
|
||||
|
||||
@ -405,7 +417,7 @@ impl Cacher for InMemoryCache {
|
||||
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
||||
let hashed_url_string = self.hash_url(url);
|
||||
match self.cache.get(&hashed_url_string) {
|
||||
Some(res) => self.post_process_search_results(res),
|
||||
Some(res) => self.post_process_search_results(res).await,
|
||||
None => Err(Report::new(CacheError::MissingValue)),
|
||||
}
|
||||
}
|
||||
@ -417,7 +429,7 @@ impl Cacher for InMemoryCache {
|
||||
) -> Result<(), Report<CacheError>> {
|
||||
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
||||
let hashed_url_string = self.hash_url(url);
|
||||
let bytes = self.pre_process_search_results(search_result)?;
|
||||
let bytes = self.pre_process_search_results(search_result).await?;
|
||||
self.cache.insert(hashed_url_string, bytes);
|
||||
}
|
||||
|
||||
|
37
src/cache/redis_cacher.rs
vendored
37
src/cache/redis_cacher.rs
vendored
@ -1,15 +1,16 @@
|
||||
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
||||
//! from the upstream search engines in a json format.
|
||||
|
||||
use super::error::CacheError;
|
||||
use error_stack::Report;
|
||||
use futures::future::try_join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
||||
|
||||
use super::error::CacheError;
|
||||
/// A constant holding the redis pipeline size.
|
||||
const REDIS_PIPELINE_SIZE: usize = 3;
|
||||
|
||||
/// A named struct which stores the redis Connection url address to which the client will
|
||||
/// connect to.
|
||||
#[derive(Clone)]
|
||||
pub struct RedisCache {
|
||||
/// It stores a pool of connections ready to be used.
|
||||
connection_pool: Vec<ConnectionManager>,
|
||||
@ -20,6 +21,8 @@ pub struct RedisCache {
|
||||
current_connection: u8,
|
||||
/// It stores the max TTL for keys.
|
||||
cache_ttl: u16,
|
||||
/// It stores the redis pipeline struct of size 3.
|
||||
pipeline: redis::Pipeline,
|
||||
}
|
||||
|
||||
impl RedisCache {
|
||||
@ -30,6 +33,8 @@ impl RedisCache {
|
||||
/// * `redis_connection_url` - It takes the redis Connection url address.
|
||||
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
|
||||
/// connections that should be stored in the pool).
|
||||
/// * `cache_ttl` - It takes the the time to live for cached results to live in the redis
|
||||
/// server.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
@ -41,18 +46,28 @@ impl RedisCache {
|
||||
cache_ttl: u16,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let client = Client::open(redis_connection_url)?;
|
||||
let mut tasks: Vec<_> = Vec::new();
|
||||
let tasks: FuturesUnordered<_> = FuturesUnordered::new();
|
||||
|
||||
for _ in 0..pool_size {
|
||||
tasks.push(client.get_connection_manager());
|
||||
let client_partially_cloned = client.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
client_partially_cloned.get_connection_manager().await
|
||||
}));
|
||||
}
|
||||
|
||||
let mut outputs = Vec::new();
|
||||
for task in tasks {
|
||||
outputs.push(task.await??);
|
||||
}
|
||||
|
||||
let redis_cache = RedisCache {
|
||||
connection_pool: try_join_all(tasks).await?,
|
||||
connection_pool: outputs,
|
||||
pool_size,
|
||||
current_connection: Default::default(),
|
||||
cache_ttl,
|
||||
pipeline: redis::Pipeline::with_capacity(REDIS_PIPELINE_SIZE),
|
||||
};
|
||||
|
||||
Ok(redis_cache)
|
||||
}
|
||||
|
||||
@ -122,13 +137,14 @@ impl RedisCache {
|
||||
keys: impl Iterator<Item = String>,
|
||||
) -> Result<(), Report<CacheError>> {
|
||||
self.current_connection = Default::default();
|
||||
let mut pipeline = redis::Pipeline::with_capacity(3);
|
||||
|
||||
for (key, json_result) in keys.zip(json_results) {
|
||||
pipeline.set_ex(key, json_result, self.cache_ttl.into());
|
||||
self.pipeline
|
||||
.set_ex(key, json_result, self.cache_ttl.into());
|
||||
}
|
||||
|
||||
let mut result: Result<(), RedisError> = pipeline
|
||||
let mut result: Result<(), RedisError> = self
|
||||
.pipeline
|
||||
.query_async(&mut self.connection_pool[self.current_connection as usize])
|
||||
.await;
|
||||
|
||||
@ -149,7 +165,8 @@ impl RedisCache {
|
||||
CacheError::PoolExhaustionWithConnectionDropError,
|
||||
));
|
||||
}
|
||||
result = pipeline
|
||||
result = self
|
||||
.pipeline
|
||||
.query_async(
|
||||
&mut self.connection_pool[self.current_connection as usize],
|
||||
)
|
||||
|
@ -9,7 +9,6 @@ use mlua::Lua;
|
||||
use std::{collections::HashMap, fs, thread::available_parallelism};
|
||||
|
||||
/// A named struct which stores the parsed config file options.
|
||||
#[derive(Clone)]
|
||||
pub struct Config {
|
||||
/// It stores the parsed port number option on which the server should launch.
|
||||
pub port: u16,
|
||||
|
@ -48,7 +48,7 @@ impl SearchEngine for Bing {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Bing uses `start results from this number` convention
|
||||
// So, for 10 results per page, page 0 starts at 1, page 1
|
||||
// starts at 11, and so on.
|
||||
|
@ -44,7 +44,7 @@ impl SearchEngine for Brave {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
let url = format!("https://search.brave.com/search?q={query}&offset={page}");
|
||||
|
||||
let safe_search_level = match safe_search {
|
||||
|
@ -47,7 +47,7 @@ impl SearchEngine for DuckDuckGo {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = match page {
|
||||
|
@ -62,7 +62,7 @@ impl SearchEngine for LibreX {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = format!(
|
||||
|
@ -47,7 +47,7 @@ impl SearchEngine for Mojeek {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Mojeek uses `start results from this number` convention
|
||||
// So, for 10 results per page, page 0 starts at 1, page 1
|
||||
// starts at 11, and so on.
|
||||
@ -72,8 +72,23 @@ impl SearchEngine for Mojeek {
|
||||
"Yep",
|
||||
"You",
|
||||
];
|
||||
|
||||
let qss = search_engines.join("%2C");
|
||||
let safe = if safe_search == 0 { "0" } else { "1" };
|
||||
|
||||
// A branchless condition to check whether the `safe_search` parameter has the
|
||||
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
|
||||
// the value to 1 for all other values of `safe_search`
|
||||
//
|
||||
// Moreover, the below branchless code is equivalent to the following code below:
|
||||
//
|
||||
// ```rust
|
||||
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
|
||||
// ```
|
||||
//
|
||||
// For more information on branchless programming. See:
|
||||
//
|
||||
// * https://piped.video/watch?v=bVJ-mWWL7cE
|
||||
let safe = u8::from(safe_search != 0).to_string();
|
||||
|
||||
// Mojeek detects automated requests, these are preferences that are
|
||||
// able to circumvent the countermeasure. Some of these are
|
||||
@ -89,7 +104,7 @@ impl SearchEngine for Mojeek {
|
||||
("hp", "minimal"),
|
||||
("lb", "en"),
|
||||
("qss", &qss),
|
||||
("safe", safe),
|
||||
("safe", &safe),
|
||||
];
|
||||
|
||||
let mut query_params_string = String::new();
|
||||
|
@ -1,5 +1,4 @@
|
||||
//! This modules provides helper functionalities for parsing a html document into internal SearchResult.
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
|
||||
use error_stack::{Report, Result};
|
||||
@ -47,7 +46,7 @@ impl SearchResultParser {
|
||||
&self,
|
||||
document: &Html,
|
||||
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
let res = document
|
||||
.select(&self.results)
|
||||
.filter_map(|result| {
|
||||
|
@ -43,12 +43,21 @@ impl SearchEngine for Searx {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
mut safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
if safe_search == 3 {
|
||||
safe_search = 2;
|
||||
};
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// A branchless condition to check whether the `safe_search` parameter has the
|
||||
// value greater than equal to three or not. If it is, then it modifies the
|
||||
// `safesearch` parameters value to 2.
|
||||
//
|
||||
// Moreover, the below branchless code is equivalent to the following code below:
|
||||
//
|
||||
// ```rust
|
||||
// safe_search = u8::from(safe_search == 3) * 2;
|
||||
// ```
|
||||
//
|
||||
// For more information on branchless programming. See:
|
||||
//
|
||||
// * https://piped.video/watch?v=bVJ-mWWL7cE
|
||||
safe_search = u8::from(safe_search >= 3) * 2;
|
||||
|
||||
let url: String = format!(
|
||||
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",
|
||||
|
@ -47,7 +47,7 @@ impl SearchEngine for Startpage {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = format!(
|
||||
|
29
src/lib.rs
29
src/lib.rs
@ -14,7 +14,7 @@ pub mod results;
|
||||
pub mod server;
|
||||
pub mod templates;
|
||||
|
||||
use std::net::TcpListener;
|
||||
use std::{net::TcpListener, sync::OnceLock};
|
||||
|
||||
use crate::server::router;
|
||||
|
||||
@ -31,6 +31,9 @@ use cache::cacher::{Cacher, SharedCache};
|
||||
use config::parser::Config;
|
||||
use handler::{file_path, FileType};
|
||||
|
||||
/// A static constant for holding the cache struct.
|
||||
static SHARED_CACHE: OnceLock<SharedCache> = OnceLock::new();
|
||||
|
||||
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
||||
///
|
||||
/// # Arguments
|
||||
@ -44,27 +47,29 @@ use handler::{file_path, FileType};
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use std::net::TcpListener;
|
||||
/// use std::{net::TcpListener, sync::OnceLock};
|
||||
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
|
||||
///
|
||||
/// /// A static constant for holding the parsed config.
|
||||
/// static CONFIG: OnceLock<Config> = OnceLock::new();
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main(){
|
||||
/// let config = Config::parse(true).unwrap();
|
||||
/// // Initialize the parsed config globally.
|
||||
/// let config = CONFIG.get_or_init(|| Config::parse(true).unwrap());
|
||||
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
|
||||
/// let cache = create_cache(&config).await;
|
||||
/// let server = run(listener,config,cache).expect("Failed to start server");
|
||||
/// let cache = create_cache(config).await;
|
||||
/// let server = run(listener,&config,cache).expect("Failed to start server");
|
||||
/// }
|
||||
/// ```
|
||||
pub fn run(
|
||||
listener: TcpListener,
|
||||
config: Config,
|
||||
config: &'static Config,
|
||||
cache: impl Cacher + 'static,
|
||||
) -> std::io::Result<Server> {
|
||||
let public_folder_path: &str = file_path(FileType::Theme)?;
|
||||
|
||||
let cloned_config_threads_opt: u8 = config.threads;
|
||||
|
||||
let cache = web::Data::new(SharedCache::new(cache));
|
||||
let cache = SHARED_CACHE.get_or_init(|| SharedCache::new(cache));
|
||||
|
||||
let server = HttpServer::new(move || {
|
||||
let cors: Cors = Cors::default()
|
||||
@ -81,8 +86,8 @@ pub fn run(
|
||||
// Compress the responses provided by the server for the client requests.
|
||||
.wrap(Compress::default())
|
||||
.wrap(Logger::default()) // added logging middleware for logging.
|
||||
.app_data(web::Data::new(config.clone()))
|
||||
.app_data(cache.clone())
|
||||
.app_data(web::Data::new(config))
|
||||
.app_data(web::Data::new(cache))
|
||||
.wrap(cors)
|
||||
.wrap(Governor::new(
|
||||
&GovernorConfigBuilder::default()
|
||||
@ -107,7 +112,7 @@ pub fn run(
|
||||
.service(router::settings) // settings page
|
||||
.default_service(web::route().to(router::not_found)) // error page
|
||||
})
|
||||
.workers(cloned_config_threads_opt as usize)
|
||||
.workers(config.threads as usize)
|
||||
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
||||
.listen(listener)?
|
||||
.run();
|
||||
|
@ -154,8 +154,8 @@ impl SearchResults {
|
||||
}
|
||||
|
||||
/// A setter function that sets the filtered to true.
|
||||
pub fn set_filtered(&mut self) {
|
||||
self.filtered = true;
|
||||
pub fn set_filtered(&mut self, filtered: bool) {
|
||||
self.filtered = filtered;
|
||||
}
|
||||
|
||||
/// A getter function that gets the value of `engine_errors_info`.
|
||||
|
@ -4,7 +4,7 @@
|
||||
use super::aggregation_models::SearchResult;
|
||||
use error_stack::{Report, Result, ResultExt};
|
||||
use reqwest::Client;
|
||||
use std::{collections::HashMap, fmt};
|
||||
use std::fmt;
|
||||
|
||||
/// A custom error type used for handle engine associated errors.
|
||||
#[derive(Debug)]
|
||||
@ -147,7 +147,7 @@ pub trait SearchEngine: Sync + Send {
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError>;
|
||||
}
|
||||
|
||||
/// A named struct which stores the engine struct with the name of the associated engine.
|
||||
|
@ -10,7 +10,7 @@
|
||||
/// order to allow the deserializing the json back to struct in aggregate function in
|
||||
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
||||
/// it to the template files.
|
||||
#[derive(Clone, Default)]
|
||||
#[derive(Default)]
|
||||
pub struct Style {
|
||||
/// It stores the parsed theme option used to set a theme for the website.
|
||||
pub theme: String,
|
||||
@ -40,7 +40,6 @@ impl Style {
|
||||
}
|
||||
|
||||
/// Configuration options for the aggregator.
|
||||
#[derive(Clone)]
|
||||
pub struct AggregatorConfig {
|
||||
/// It stores the option to whether enable or disable random delays between
|
||||
/// requests.
|
||||
@ -48,7 +47,6 @@ pub struct AggregatorConfig {
|
||||
}
|
||||
|
||||
/// Configuration options for the rate limiter middleware.
|
||||
#[derive(Clone)]
|
||||
pub struct RateLimiter {
|
||||
/// The number of request that are allowed within a provided time limit.
|
||||
pub number_of_requests: u8,
|
||||
|
@ -9,22 +9,25 @@ use crate::models::{
|
||||
engine_models::{EngineError, EngineHandler},
|
||||
};
|
||||
use error_stack::Report;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use regex::Regex;
|
||||
use reqwest::{Client, ClientBuilder};
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
io::{BufReader, Read},
|
||||
use tokio::{
|
||||
fs::File,
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
task::JoinHandle,
|
||||
time::Duration,
|
||||
};
|
||||
use std::{fs::File, io::BufRead};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
/// A constant for holding the prebuilt Client globally in the app.
|
||||
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
|
||||
|
||||
/// Aliases for long type annotations
|
||||
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
||||
|
||||
type FutureVec =
|
||||
FuturesUnordered<JoinHandle<Result<Vec<(String, SearchResult)>, Report<EngineError>>>>;
|
||||
|
||||
/// The function aggregates the scraped results from the user-selected upstream search engines.
|
||||
/// These engines can be chosen either from the user interface (UI) or from the configuration file.
|
||||
@ -37,7 +40,7 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
||||
///
|
||||
/// Additionally, the function eliminates duplicate results. If two results are identified as coming from
|
||||
/// multiple engines, their names are combined to indicate that the results were fetched from these upstream
|
||||
/// engines. After this, all the data in the `HashMap` is removed and placed into a struct that contains all
|
||||
/// engines. After this, all the data in the `Vec` is removed and placed into a struct that contains all
|
||||
/// the aggregated results in a vector. Furthermore, the query used is also added to the struct. This step is
|
||||
/// necessary to ensure that the search bar in the search remains populated even when searched from the query URL.
|
||||
///
|
||||
@ -94,15 +97,22 @@ pub async fn aggregate(
|
||||
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||
|
||||
// create tasks for upstream result fetching
|
||||
let mut tasks: FutureVec = FutureVec::new();
|
||||
let tasks: FutureVec = FutureVec::new();
|
||||
|
||||
let query: Arc<String> = Arc::new(query.to_string());
|
||||
for engine_handler in upstream_search_engines {
|
||||
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
||||
let (name, search_engine) = engine_handler.clone().into_name_engine();
|
||||
names.push(name);
|
||||
let query: String = query.to_owned();
|
||||
let query_partially_cloned = query.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
search_engine
|
||||
.results(&query, page, user_agent, client, safe_search)
|
||||
.results(
|
||||
&query_partially_cloned,
|
||||
page,
|
||||
user_agent,
|
||||
client,
|
||||
safe_search,
|
||||
)
|
||||
.await
|
||||
}));
|
||||
}
|
||||
@ -117,7 +127,7 @@ pub async fn aggregate(
|
||||
}
|
||||
|
||||
// aggregate search results, removing duplicates and handling errors the upstream engines returned
|
||||
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
let mut result_map: Vec<(String, SearchResult)> = Vec::new();
|
||||
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
||||
|
||||
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
||||
@ -134,51 +144,45 @@ pub async fn aggregate(
|
||||
|
||||
if result_map.is_empty() {
|
||||
match response {
|
||||
Ok(results) => {
|
||||
result_map = results.clone();
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
Ok(results) => result_map = results,
|
||||
Err(error) => handle_error(&error, engine),
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
match response {
|
||||
Ok(result) => {
|
||||
result.into_iter().for_each(|(key, value)| {
|
||||
result_map
|
||||
.entry(key)
|
||||
.and_modify(|result| {
|
||||
result.add_engines(engine);
|
||||
})
|
||||
.or_insert_with(|| -> SearchResult { value });
|
||||
match result_map.iter().find(|(key_s, _)| key_s == &key) {
|
||||
Some(value) => value.1.to_owned().add_engines(engine),
|
||||
None => result_map.push((key, value)),
|
||||
};
|
||||
});
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
Err(error) => handle_error(&error, engine),
|
||||
};
|
||||
}
|
||||
|
||||
if safe_search >= 3 {
|
||||
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
let mut blacklist_map: Vec<(String, SearchResult)> = Vec::new();
|
||||
filter_with_lists(
|
||||
&mut result_map,
|
||||
&mut blacklist_map,
|
||||
file_path(FileType::BlockList)?,
|
||||
)?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
filter_with_lists(
|
||||
&mut blacklist_map,
|
||||
&mut result_map,
|
||||
file_path(FileType::AllowList)?,
|
||||
)?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
drop(blacklist_map);
|
||||
}
|
||||
|
||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
||||
let results: Vec<SearchResult> = result_map.iter().map(|(_, value)| value.clone()).collect();
|
||||
|
||||
Ok(SearchResults::new(results, &engine_errors_info))
|
||||
}
|
||||
@ -187,35 +191,41 @@ pub async fn aggregate(
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
|
||||
/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
|
||||
/// * `map_to_be_filtered` - A mutable reference to a `Vec` of search results to filter, where the filtered results will be removed from.
|
||||
/// * `resultant_map` - A mutable reference to a `Vec` to hold the filtered results.
|
||||
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
||||
pub fn filter_with_lists(
|
||||
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
||||
resultant_map: &mut HashMap<String, SearchResult>,
|
||||
pub async fn filter_with_lists(
|
||||
map_to_be_filtered: &mut Vec<(String, SearchResult)>,
|
||||
resultant_map: &mut Vec<(String, SearchResult)>,
|
||||
file_path: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut reader = BufReader::new(File::open(file_path)?);
|
||||
let reader = BufReader::new(File::open(file_path).await?);
|
||||
let mut lines = reader.lines();
|
||||
|
||||
for line in reader.by_ref().lines() {
|
||||
let re = Regex::new(line?.trim())?;
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
let re = Regex::new(line.trim())?;
|
||||
|
||||
let mut length = map_to_be_filtered.len();
|
||||
let mut idx: usize = Default::default();
|
||||
// Iterate over each search result in the map and check if it matches the regex pattern
|
||||
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
||||
if re.is_match(&url.to_lowercase())
|
||||
|| re.is_match(&search_result.title.to_lowercase())
|
||||
|| re.is_match(&search_result.description.to_lowercase())
|
||||
while idx < length {
|
||||
let ele = &map_to_be_filtered[idx];
|
||||
let ele_inner = &ele.1;
|
||||
match re.is_match(&ele.0.to_lowercase())
|
||||
|| re.is_match(&ele_inner.title.to_lowercase())
|
||||
|| re.is_match(&ele_inner.description.to_lowercase())
|
||||
{
|
||||
true => {
|
||||
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||
resultant_map.insert(
|
||||
url.to_owned(),
|
||||
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
||||
);
|
||||
resultant_map.push(map_to_be_filtered.swap_remove(idx));
|
||||
length -= 1;
|
||||
}
|
||||
false => idx += 1,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,15 +236,14 @@ pub fn filter_with_lists(
|
||||
mod tests {
|
||||
use super::*;
|
||||
use smallvec::smallvec;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create a map of search results to filter
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
map_to_be_filtered.push((
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_owned(),
|
||||
@ -243,15 +252,15 @@ mod tests {
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
));
|
||||
map_to_be_filtered.push((
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
url: "https://www.rust-lang.org/".to_owned(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
},)
|
||||
);
|
||||
|
||||
// Create a temporary file with regex patterns
|
||||
@ -260,25 +269,30 @@ mod tests {
|
||||
writeln!(file, "rust")?;
|
||||
file.flush()?;
|
||||
|
||||
let mut resultant_map = HashMap::new();
|
||||
let mut resultant_map = Vec::new();
|
||||
filter_with_lists(
|
||||
&mut map_to_be_filtered,
|
||||
&mut resultant_map,
|
||||
file.path().to_str().unwrap(),
|
||||
)?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(resultant_map.len(), 2);
|
||||
assert!(resultant_map.contains_key("https://www.example.com"));
|
||||
assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
|
||||
assert!(resultant_map
|
||||
.iter()
|
||||
.any(|(key, _)| key == "https://www.example.com"));
|
||||
assert!(resultant_map
|
||||
.iter()
|
||||
.any(|(key, _)| key == "https://www.rust-lang.org/"));
|
||||
assert_eq!(map_to_be_filtered.len(), 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
map_to_be_filtered.push((
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_owned(),
|
||||
@ -287,8 +301,8 @@ mod tests {
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
));
|
||||
map_to_be_filtered.push((
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
@ -296,34 +310,39 @@ mod tests {
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
);
|
||||
));
|
||||
|
||||
// Create a temporary file with a regex pattern containing a wildcard
|
||||
let mut file = NamedTempFile::new()?;
|
||||
writeln!(file, "ex.*le")?;
|
||||
file.flush()?;
|
||||
|
||||
let mut resultant_map = HashMap::new();
|
||||
let mut resultant_map = Vec::new();
|
||||
|
||||
filter_with_lists(
|
||||
&mut map_to_be_filtered,
|
||||
&mut resultant_map,
|
||||
file.path().to_str().unwrap(),
|
||||
)?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(resultant_map.len(), 1);
|
||||
assert!(resultant_map.contains_key("https://www.example.com"));
|
||||
assert!(resultant_map
|
||||
.iter()
|
||||
.any(|(key, _)| key == "https://www.example.com"));
|
||||
assert_eq!(map_to_be_filtered.len(), 1);
|
||||
assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
|
||||
assert!(map_to_be_filtered
|
||||
.iter()
|
||||
.any(|(key, _)| key == "https://www.rust-lang.org/"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_with_lists_file_not_found() {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists_file_not_found() {
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
|
||||
let mut resultant_map = HashMap::new();
|
||||
let mut resultant_map = Vec::new();
|
||||
|
||||
// Call the `filter_with_lists` function with a non-existent file path
|
||||
let result = filter_with_lists(
|
||||
@ -332,13 +351,13 @@ mod tests {
|
||||
"non-existent-file.txt",
|
||||
);
|
||||
|
||||
assert!(result.is_err());
|
||||
assert!(result.await.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_with_lists_invalid_regex() {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists_invalid_regex() {
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
map_to_be_filtered.push((
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_owned(),
|
||||
@ -347,9 +366,9 @@ mod tests {
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
));
|
||||
|
||||
let mut resultant_map = HashMap::new();
|
||||
let mut resultant_map = Vec::new();
|
||||
|
||||
// Create a temporary file with an invalid regex pattern
|
||||
let mut file = NamedTempFile::new().unwrap();
|
||||
@ -362,6 +381,6 @@ mod tests {
|
||||
file.path().to_str().unwrap(),
|
||||
);
|
||||
|
||||
assert!(result.is_err());
|
||||
assert!(result.await.is_err());
|
||||
}
|
||||
}
|
||||
|
@ -7,11 +7,13 @@ use crate::{
|
||||
handler::{file_path, FileType},
|
||||
};
|
||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||
use std::fs::read_to_string;
|
||||
use tokio::fs::read_to_string;
|
||||
|
||||
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
||||
#[get("/")]
|
||||
pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
pub async fn index(
|
||||
config: web::Data<&'static Config>,
|
||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||
crate::templates::views::index::index(
|
||||
&config.style.colorscheme,
|
||||
@ -25,7 +27,7 @@ pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn st
|
||||
/// Handles the route of any other accessed route/page which is not provided by the
|
||||
/// website essentially the 404 error page.
|
||||
pub async fn not_found(
|
||||
config: web::Data<Config>,
|
||||
config: web::Data<&'static Config>,
|
||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||
crate::templates::views::not_found::not_found(
|
||||
@ -41,7 +43,7 @@ pub async fn not_found(
|
||||
#[get("/robots.txt")]
|
||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
let page_content: String =
|
||||
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
|
||||
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?)).await?;
|
||||
Ok(HttpResponse::Ok()
|
||||
.content_type(ContentType::plaintext())
|
||||
.body(page_content))
|
||||
@ -49,7 +51,9 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
|
||||
|
||||
/// Handles the route of about page of the `websurfx` meta search engine website.
|
||||
#[get("/about")]
|
||||
pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
pub async fn about(
|
||||
config: web::Data<&'static Config>,
|
||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||
crate::templates::views::about::about(
|
||||
&config.style.colorscheme,
|
||||
@ -63,7 +67,7 @@ pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn st
|
||||
/// Handles the route of settings page of the `websurfx` meta search engine website.
|
||||
#[get("/settings")]
|
||||
pub async fn settings(
|
||||
config: web::Data<Config>,
|
||||
config: web::Data<&'static Config>,
|
||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||
crate::templates::views::settings::settings(
|
||||
|
@ -13,12 +13,12 @@ use crate::{
|
||||
};
|
||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||
use regex::Regex;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
use std::borrow::Cow;
|
||||
use tokio::{
|
||||
fs::File,
|
||||
io::{BufRead, BufReader, Read},
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
join,
|
||||
};
|
||||
use tokio::join;
|
||||
|
||||
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
||||
/// two search url parameters `q` and `page` where `page` parameter is optional.
|
||||
@ -37,8 +37,8 @@ use tokio::join;
|
||||
#[get("/search")]
|
||||
pub async fn search(
|
||||
req: HttpRequest,
|
||||
config: web::Data<Config>,
|
||||
cache: web::Data<SharedCache>,
|
||||
config: web::Data<&'static Config>,
|
||||
cache: web::Data<&'static SharedCache>,
|
||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
use std::sync::Arc;
|
||||
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
||||
@ -70,8 +70,8 @@ pub async fn search(
|
||||
});
|
||||
|
||||
search_settings.safe_search_level = get_safesearch_level(
|
||||
&Some(search_settings.safe_search_level),
|
||||
¶ms.safesearch,
|
||||
params.safesearch,
|
||||
search_settings.safe_search_level,
|
||||
config.safe_search,
|
||||
);
|
||||
|
||||
@ -158,8 +158,8 @@ pub async fn search(
|
||||
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
||||
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
||||
async fn results(
|
||||
config: &Config,
|
||||
cache: &web::Data<SharedCache>,
|
||||
config: &'static Config,
|
||||
cache: &'static SharedCache,
|
||||
query: &str,
|
||||
page: u32,
|
||||
search_settings: &server_models::Cookie<'_>,
|
||||
@ -188,7 +188,7 @@ async fn results(
|
||||
let mut results: SearchResults = SearchResults::default();
|
||||
|
||||
let flag: bool =
|
||||
!is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
||||
!is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?;
|
||||
// Return early when query contains disallowed words,
|
||||
if flag {
|
||||
results.set_disallowed();
|
||||
@ -225,12 +225,12 @@ async fn results(
|
||||
search_results
|
||||
}
|
||||
};
|
||||
if results.engine_errors_info().is_empty()
|
||||
&& results.results().is_empty()
|
||||
&& !results.no_engines_selected()
|
||||
{
|
||||
results.set_filtered();
|
||||
}
|
||||
let (engine_errors_info, results_empty_check, no_engines_selected) = (
|
||||
results.engine_errors_info().is_empty(),
|
||||
results.results().is_empty(),
|
||||
results.no_engines_selected(),
|
||||
);
|
||||
results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected);
|
||||
cache
|
||||
.cache_results(&[results.clone()], &[cache_key.clone()])
|
||||
.await?;
|
||||
@ -252,13 +252,14 @@ async fn results(
|
||||
///
|
||||
/// Returns a bool indicating whether the results were found in the list or not on success
|
||||
/// otherwise returns a standard error type on a failure.
|
||||
fn is_match_from_filter_list(
|
||||
async fn is_match_from_filter_list(
|
||||
file_path: &str,
|
||||
query: &str,
|
||||
) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
let mut reader = BufReader::new(File::open(file_path)?);
|
||||
for line in reader.by_ref().lines() {
|
||||
let re = Regex::new(&line?)?;
|
||||
let reader = BufReader::new(File::open(file_path).await?);
|
||||
let mut lines = reader.lines();
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
let re = Regex::new(&line)?;
|
||||
if re.is_match(query) {
|
||||
return Ok(true);
|
||||
}
|
||||
@ -267,24 +268,95 @@ fn is_match_from_filter_list(
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
/// A helper function to modify the safe search level based on the url params.
|
||||
/// The `safe_search` is the one in the user's cookie or
|
||||
/// the default set by the server config if the cookie was missing.
|
||||
/// A helper function to choose the safe search level value based on the URL parameters,
|
||||
/// cookie value and config value.
|
||||
///
|
||||
/// # Argurments
|
||||
///
|
||||
/// * `url_level` - Safe search level from the url.
|
||||
/// * `safe_search` - User's cookie, or the safe search level set by the server
|
||||
/// * `config_level` - Safe search level to fall back to
|
||||
fn get_safesearch_level(cookie_level: &Option<u8>, url_level: &Option<u8>, config_level: u8) -> u8 {
|
||||
match url_level {
|
||||
Some(url_level) => {
|
||||
if *url_level >= 3 {
|
||||
config_level
|
||||
/// * `safe_search_level_from_url` - Safe search level from the URL parameters.
|
||||
/// * `cookie_safe_search_level` - Safe search level value from the cookie.
|
||||
/// * `config_safe_search_level` - Safe search level value from the config file.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns an appropriate safe search level value based on the safe search level values
|
||||
/// from the URL parameters, cookie and the config file.
|
||||
fn get_safesearch_level(
|
||||
safe_search_level_from_url: Option<u8>,
|
||||
cookie_safe_search_level: u8,
|
||||
config_safe_search_level: u8,
|
||||
) -> u8 {
|
||||
(u8::from(safe_search_level_from_url.is_some())
|
||||
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||
+ (u8::from(config_safe_search_level < 3) * safe_search_level_from_url.unwrap_or(0))))
|
||||
+ (u8::from(safe_search_level_from_url.is_none())
|
||||
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
/// A helper function which creates a random mock safe search level value.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns an optional u8 value.
|
||||
fn mock_safe_search_level_value() -> Option<u8> {
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.subsec_nanos() as f32;
|
||||
let delay = ((nanos / 1_0000_0000 as f32).floor() as i8) - 1;
|
||||
|
||||
match delay {
|
||||
-1 => None,
|
||||
some_num => Some(if some_num > 4 { some_num - 4 } else { some_num } as u8),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// A test function to test whether the output of the branchless and branched code
|
||||
/// for the code to choose the appropriate safe search level is same or not.
|
||||
fn get_safesearch_level_branched_branchless_code_test() {
|
||||
// Get mock values for the safe search level values for URL parameters, cookie
|
||||
// and config.
|
||||
let safe_search_level_from_url = mock_safe_search_level_value();
|
||||
let cookie_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
|
||||
let config_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
|
||||
|
||||
// Branched code
|
||||
let safe_search_level_value_from_branched_code = match safe_search_level_from_url {
|
||||
Some(safe_search_level_from_url_parsed) => {
|
||||
if config_safe_search_level >= 3 {
|
||||
config_safe_search_level
|
||||
} else {
|
||||
*url_level
|
||||
safe_search_level_from_url_parsed
|
||||
}
|
||||
}
|
||||
None => cookie_level.unwrap_or(config_level),
|
||||
None => {
|
||||
if config_safe_search_level >= 3 {
|
||||
config_safe_search_level
|
||||
} else {
|
||||
cookie_safe_search_level
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// branchless code
|
||||
let safe_search_level_value_from_branchless_code =
|
||||
(u8::from(safe_search_level_from_url.is_some())
|
||||
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||
+ (u8::from(config_safe_search_level < 3)
|
||||
* safe_search_level_from_url.unwrap_or(0))))
|
||||
+ (u8::from(safe_search_level_from_url.is_none())
|
||||
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)));
|
||||
|
||||
assert_eq!(
|
||||
safe_search_level_value_from_branched_code,
|
||||
safe_search_level_value_from_branchless_code
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
|
||||
input type="checkbox" class="engine" checked;
|
||||
span class="slider round"{}
|
||||
}
|
||||
(format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned()))
|
||||
(format!("{}{}",&engine_name[..1].to_uppercase(), &engine_name[1..]))
|
||||
}
|
||||
}
|
||||
@else {
|
||||
@ -64,7 +64,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
|
||||
input type="checkbox" class="engine";
|
||||
span class="slider round"{}
|
||||
}
|
||||
(format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned()))
|
||||
(format!("{}{}",&engine_name[..1], &engine_name[1..]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ fn style_option_list(
|
||||
}
|
||||
|
||||
if style_type == "animations" {
|
||||
style_option_names.push(("".to_owned(), "none".to_owned()))
|
||||
style_option_names.push((String::default(), "none".to_owned()))
|
||||
}
|
||||
|
||||
Ok(style_option_names)
|
||||
@ -83,9 +83,11 @@ pub fn user_interface(
|
||||
"Select the animation for your theme to be used in user interface"
|
||||
}
|
||||
select name="animations"{
|
||||
@let default_animation = &String::default();
|
||||
@let animation = animation.as_ref().unwrap_or(default_animation);
|
||||
// Sets the user selected animation name from the config file as the first option in the selection list.
|
||||
option value=(animation.as_ref().unwrap_or(&"".to_owned())){(animation.as_ref().unwrap_or(&"".to_owned()).replace('-'," "))}
|
||||
@for (k,v) in style_option_list("animations", animation.as_ref().unwrap_or(&"".to_owned()))?{
|
||||
option value=(animation){(animation.replace('-'," "))}
|
||||
@for (k,v) in style_option_list("animations", animation)?{
|
||||
option value=(k){(v)}
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ pub fn search(
|
||||
small{(result.url)}
|
||||
p{(PreEscaped(&result.description))}
|
||||
.upstream_engines{
|
||||
@for name in result.clone().engine{
|
||||
@for name in &result.engine {
|
||||
span{(name)}
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,17 @@
|
||||
use std::net::TcpListener;
|
||||
use std::{net::TcpListener, sync::OnceLock};
|
||||
|
||||
use websurfx::{config::parser::Config, run, templates::views};
|
||||
|
||||
/// A static constant for holding the parsed config.
|
||||
static CONFIG: OnceLock<Config> = OnceLock::new();
|
||||
|
||||
// Starts a new instance of the HTTP server, bound to a random available port
|
||||
async fn spawn_app() -> String {
|
||||
// Binding to port 0 will trigger the OS to assign a port for us.
|
||||
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
|
||||
let port = listener.local_addr().unwrap().port();
|
||||
let config = Config::parse(false).unwrap();
|
||||
let cache = websurfx::cache::cacher::create_cache(&config).await;
|
||||
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
|
||||
let cache = websurfx::cache::cacher::create_cache(config).await;
|
||||
let server = run(listener, config, cache).expect("Failed to bind address");
|
||||
|
||||
tokio::spawn(server);
|
||||
|
Loading…
Reference in New Issue
Block a user