0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-26 15:58:21 -05:00

Compare commits

..

2 Commits

Author SHA1 Message Date
neon_arch
c3da0b3303
Merge branch 'rolling' into FEAT/528_new-experimental-feature-to-improve-performance-using-io-uring-on-linux-distributions 2024-03-11 22:26:26 +03:00
neon_arch
991f3f59de
perf: several optimizations for improving the performance of the engine (#540)
* ♻️ refactor: initialize & store the config & cache structs as a constant (#486)
- initializes & stores the config & cache structs as a static constant.
- Pass the config & cache structs as a static reference to all the
  functions handling their respective route.

*  perf: replace hashmaps with vectors for fetching & aggregating results (#486)
- replace hashmaps with vectors for fetching, collecting & aggregating results as it tends to be contigous & cache efficient data structure.
- refactor & redesign algorithms for fetching & aggregating results
  centered around vectors in aggregate function.

*  build: add the future crate (#486)

*  perf: use `futureunordered` for collecting results fetched from the tokio spawn tasks (#486)
- using the `futureunordered` instead of vector for collecting results
  reduces the time it takes to fetch the results as the results do not
  need to come in specific order so any result that gets fetched first
  gets collected in the `futureunordered` type.

Co-authored-by: Spencerjibz <spencernajib2@gmail.com>

*  perf: initialize new async connections parallely using tokio spawn tasks (#486)

*  perf: initialize redis pipeline struct once with the default size of 3 (#486)

*  perf: reduce branch predictions by reducing conditional code branches (#486)

*  test(unit): provide unit test for the `get_safesearch_level` function (#486)

*  perf: reduce clones & use index based loop to improve search results filtering performance (#486)

* 🚨 fix(clippy): make clippy/format checks happy (#486)

* 🚨 fix(build): make the cargo build check happy (#486)

*  perf: reduce the amount of clones, to_owneds & to_strings (#486)

*  perf: use async crates & methods & make functions async (#486)

* 🔖 chore(release): bump the app version (#486)

---------

Co-authored-by: Spencerjibz <spencernajib2@gmail.com>
2024-03-11 12:01:30 +03:00
25 changed files with 379 additions and 222 deletions

2
Cargo.lock generated
View File

@ -4205,11 +4205,11 @@ dependencies = [
"actix-files", "actix-files",
"actix-governor", "actix-governor",
"actix-web", "actix-web",
"async-compression",
"async-once-cell", "async-once-cell",
"async-trait", "async-trait",
"base64 0.21.7", "base64 0.21.7",
"blake3", "blake3",
"brotli",
"cfg-if 1.0.0", "cfg-if 1.0.0",
"chacha20", "chacha20",
"chacha20poly1305", "chacha20poly1305",

View File

@ -14,7 +14,7 @@ path = "src/bin/websurfx.rs"
[dependencies] [dependencies]
reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]} reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false} tokio = {version="1.32.0",features=["rt-multi-thread","macros", "fs", "io-util"], default-features = false}
serde = {version="1.0.196", default-features=false, features=["derive"]} serde = {version="1.0.196", default-features=false, features=["derive"]}
serde_json = {version="1.0.109", default-features=false} serde_json = {version="1.0.109", default-features=false}
maud = {version="0.25.0", default-features=false, features=["actix-web"]} maud = {version="0.25.0", default-features=false, features=["actix-web"]}
@ -32,13 +32,13 @@ error-stack = {version="0.4.0", default-features=false, features=["std"]}
async-trait = {version="0.1.76", default-features=false} async-trait = {version="0.1.76", default-features=false}
regex = {version="1.9.4", features=["perf"], default-features = false} regex = {version="1.9.4", features=["perf"], default-features = false}
smallvec = {version="1.13.1", features=["union", "serde"], default-features=false} smallvec = {version="1.13.1", features=["union", "serde"], default-features=false}
futures = {version="0.3.28", default-features=false} futures = {version="0.3.30", default-features=false, features=["alloc"]}
dhat = {version="0.3.3", optional = true, default-features=false} dhat = {version="0.3.2", optional = true, default-features=false}
mimalloc = { version = "0.1.38", default-features = false } mimalloc = { version = "0.1.38", default-features = false }
async-once-cell = {version="0.5.3", default-features=false} async-once-cell = {version="0.5.3", default-features=false}
actix-governor = {version="0.5.0", default-features=false} actix-governor = {version="0.5.0", default-features=false}
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]} mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true} async-compression = { version = "0.4.6", default-features = false, features=["brotli","tokio"], optional=true}
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true} chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
chacha20 = {version="0.9.1", default-features=false, optional=true} chacha20 = {version="0.9.1", default-features=false, optional=true}
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true} base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
@ -84,7 +84,7 @@ default = ["memory-cache"]
dhat-heap = ["dep:dhat"] dhat-heap = ["dep:dhat"]
memory-cache = ["dep:mini-moka"] memory-cache = ["dep:mini-moka"]
redis-cache = ["dep:redis","dep:base64"] redis-cache = ["dep:redis","dep:base64"]
compress-cache-results = ["dep:brotli","dep:cfg-if"] compress-cache-results = ["dep:async-compression","dep:cfg-if"]
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"] encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
cec-cache-results = ["compress-cache-results","encrypt-cache-results"] cec-cache-results = ["compress-cache-results","encrypt-cache-results"]
experimental-io-uring = ["actix-web/experimental-io-uring"] experimental-io-uring = ["actix-web/experimental-io-uring"]

View File

@ -5,7 +5,7 @@
#[cfg(not(feature = "dhat-heap"))] #[cfg(not(feature = "dhat-heap"))]
use mimalloc::MiMalloc; use mimalloc::MiMalloc;
use std::net::TcpListener; use std::{net::TcpListener, sync::OnceLock};
use websurfx::{cache::cacher::create_cache, config::parser::Config, run}; use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
/// A dhat heap memory profiler /// A dhat heap memory profiler
@ -17,6 +17,9 @@ static ALLOC: dhat::Alloc = dhat::Alloc;
#[global_allocator] #[global_allocator]
static GLOBAL: MiMalloc = MiMalloc; static GLOBAL: MiMalloc = MiMalloc;
/// A static constant for holding the parsed config.
static CONFIG: OnceLock<Config> = OnceLock::new();
/// The function that launches the main server and registers all the routes of the website. /// The function that launches the main server and registers all the routes of the website.
/// ///
/// # Error /// # Error
@ -29,10 +32,10 @@ async fn main() -> std::io::Result<()> {
#[cfg(feature = "dhat-heap")] #[cfg(feature = "dhat-heap")]
let _profiler = dhat::Profiler::new_heap(); let _profiler = dhat::Profiler::new_heap();
// Initialize the parsed config file. // Initialize the parsed config globally.
let config = Config::parse(false).unwrap(); let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
let cache = create_cache(&config).await; let cache = create_cache(config).await;
log::info!( log::info!(
"started server on port {} and IP {}", "started server on port {} and IP {}",
@ -45,7 +48,7 @@ async fn main() -> std::io::Result<()> {
config.port, config.port,
); );
let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?; let listener = TcpListener::bind((config.binding_ip.as_str(), config.port))?;
run(listener, config, cache)?.await run(listener, config, cache)?.await
} }

68
src/cache/cacher.rs vendored
View File

@ -93,7 +93,7 @@ pub trait Cacher: Send + Sync {
feature = "encrypt-cache-results", feature = "encrypt-cache-results",
feature = "cec-cache-results" feature = "cec-cache-results"
))] ))]
fn encrypt_or_decrypt_results( async fn encrypt_or_decrypt_results(
&mut self, &mut self,
mut bytes: Vec<u8>, mut bytes: Vec<u8>,
encrypt: bool, encrypt: bool,
@ -137,11 +137,19 @@ pub trait Cacher: Send + Sync {
/// Returns the compressed bytes on success otherwise it returns a CacheError /// Returns the compressed bytes on success otherwise it returns a CacheError
/// on failure. /// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
fn compress_results(&mut self, mut bytes: Vec<u8>) -> Result<Vec<u8>, Report<CacheError>> { async fn compress_results(
use std::io::Write; &mut self,
let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22); mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
writer writer
.write_all(&bytes) .write_all(&bytes)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
.map_err(|_| CacheError::CompressionError)?; .map_err(|_| CacheError::CompressionError)?;
bytes = writer.into_inner(); bytes = writer.into_inner();
Ok(bytes) Ok(bytes)
@ -159,17 +167,17 @@ pub trait Cacher: Send + Sync {
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError /// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
/// on failure. /// on failure.
#[cfg(feature = "cec-cache-results")] #[cfg(feature = "cec-cache-results")]
fn compress_encrypt_compress_results( async fn compress_encrypt_compress_results(
&mut self, &mut self,
mut bytes: Vec<u8>, mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> { ) -> Result<Vec<u8>, Report<CacheError>> {
// compress first // compress first
bytes = self.compress_results(bytes)?; bytes = self.compress_results(bytes).await?;
// encrypt // encrypt
bytes = self.encrypt_or_decrypt_results(bytes, true)?; bytes = self.encrypt_or_decrypt_results(bytes, true).await?;
// compress again; // compress again;
bytes = self.compress_results(bytes)?; bytes = self.compress_results(bytes).await?;
Ok(bytes) Ok(bytes)
} }
@ -187,11 +195,11 @@ pub trait Cacher: Send + Sync {
/// on failure. /// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> { async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(feature = "compress-cache-results")] if #[cfg(feature = "compress-cache-results")]
{ {
decompress_util(bytes) decompress_util(bytes).await
} }
else if #[cfg(feature = "cec-cache-results")] else if #[cfg(feature = "cec-cache-results")]
@ -199,7 +207,7 @@ pub trait Cacher: Send + Sync {
let decompressed = decompress_util(bytes)?; let decompressed = decompress_util(bytes)?;
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?; let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
decompress_util(&decrypted) decompress_util(&decrypted).await
} }
} }
@ -216,7 +224,7 @@ pub trait Cacher: Send + Sync {
/// # Error /// # Error
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError /// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
/// on failure. /// on failure.
fn pre_process_search_results( async fn pre_process_search_results(
&mut self, &mut self,
search_results: &SearchResults, search_results: &SearchResults,
) -> Result<Vec<u8>, Report<CacheError>> { ) -> Result<Vec<u8>, Report<CacheError>> {
@ -224,19 +232,20 @@ pub trait Cacher: Send + Sync {
let mut bytes: Vec<u8> = search_results.try_into()?; let mut bytes: Vec<u8> = search_results.try_into()?;
#[cfg(feature = "compress-cache-results")] #[cfg(feature = "compress-cache-results")]
{ {
let compressed = self.compress_results(bytes)?; let compressed = self.compress_results(bytes).await?;
bytes = compressed; bytes = compressed;
} }
#[cfg(feature = "encrypt-cache-results")] #[cfg(feature = "encrypt-cache-results")]
{ {
let encrypted = self.encrypt_or_decrypt_results(bytes, true)?; let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?;
bytes = encrypted; bytes = encrypted;
} }
#[cfg(feature = "cec-cache-results")] #[cfg(feature = "cec-cache-results")]
{ {
let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?; let compressed_encrypted_compressed =
self.compress_encrypt_compress_results(bytes).await?;
bytes = compressed_encrypted_compressed; bytes = compressed_encrypted_compressed;
} }
@ -256,25 +265,25 @@ pub trait Cacher: Send + Sync {
/// on failure. /// on failure.
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled #[allow(unused_mut)] // needs to be mutable when any of the features is enabled
fn post_process_search_results( async fn post_process_search_results(
&mut self, &mut self,
mut bytes: Vec<u8>, mut bytes: Vec<u8>,
) -> Result<SearchResults, Report<CacheError>> { ) -> Result<SearchResults, Report<CacheError>> {
#[cfg(feature = "compress-cache-results")] #[cfg(feature = "compress-cache-results")]
{ {
let decompressed = self.decompress_results(&bytes)?; let decompressed = self.decompress_results(&bytes).await?;
bytes = decompressed bytes = decompressed
} }
#[cfg(feature = "encrypt-cache-results")] #[cfg(feature = "encrypt-cache-results")]
{ {
let decrypted = self.encrypt_or_decrypt_results(bytes, false)?; let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?;
bytes = decrypted bytes = decrypted
} }
#[cfg(feature = "cec-cache-results")] #[cfg(feature = "cec-cache-results")]
{ {
let decompressed_decrypted = self.decompress_results(&bytes)?; let decompressed_decrypted = self.decompress_results(&bytes).await?;
bytes = decompressed_decrypted; bytes = decompressed_decrypted;
} }
@ -295,16 +304,19 @@ pub trait Cacher: Send + Sync {
/// on failure. /// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> { async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
use std::io::Write; use tokio::io::AsyncWriteExt;
let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096); let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new());
writer writer
.write_all(input) .write_all(input)
.await
.map_err(|_| CacheError::CompressionError)?; .map_err(|_| CacheError::CompressionError)?;
let bytes = writer writer
.into_inner() .shutdown()
.await
.map_err(|_| CacheError::CompressionError)?; .map_err(|_| CacheError::CompressionError)?;
let bytes = writer.into_inner();
Ok(bytes) Ok(bytes)
} }
@ -329,7 +341,7 @@ impl Cacher for RedisCache {
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(base64_string) .decode(base64_string)
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?; .map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
self.post_process_search_results(bytes) self.post_process_search_results(bytes).await
} }
async fn cache_results( async fn cache_results(
@ -345,7 +357,7 @@ impl Cacher for RedisCache {
let mut bytes = Vec::with_capacity(search_results_len); let mut bytes = Vec::with_capacity(search_results_len);
for result in search_results { for result in search_results {
let processed = self.pre_process_search_results(result)?; let processed = self.pre_process_search_results(result).await?;
bytes.push(processed); bytes.push(processed);
} }
@ -405,7 +417,7 @@ impl Cacher for InMemoryCache {
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> { async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url); let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string) { match self.cache.get(&hashed_url_string) {
Some(res) => self.post_process_search_results(res), Some(res) => self.post_process_search_results(res).await,
None => Err(Report::new(CacheError::MissingValue)), None => Err(Report::new(CacheError::MissingValue)),
} }
} }
@ -417,7 +429,7 @@ impl Cacher for InMemoryCache {
) -> Result<(), Report<CacheError>> { ) -> Result<(), Report<CacheError>> {
for (url, search_result) in urls.iter().zip(search_results.iter()) { for (url, search_result) in urls.iter().zip(search_results.iter()) {
let hashed_url_string = self.hash_url(url); let hashed_url_string = self.hash_url(url);
let bytes = self.pre_process_search_results(search_result)?; let bytes = self.pre_process_search_results(search_result).await?;
self.cache.insert(hashed_url_string, bytes); self.cache.insert(hashed_url_string, bytes);
} }

View File

@ -1,15 +1,16 @@
//! This module provides the functionality to cache the aggregated results fetched and aggregated //! This module provides the functionality to cache the aggregated results fetched and aggregated
//! from the upstream search engines in a json format. //! from the upstream search engines in a json format.
use super::error::CacheError;
use error_stack::Report; use error_stack::Report;
use futures::future::try_join_all; use futures::stream::FuturesUnordered;
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError}; use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
use super::error::CacheError; /// A constant holding the redis pipeline size.
const REDIS_PIPELINE_SIZE: usize = 3;
/// A named struct which stores the redis Connection url address to which the client will /// A named struct which stores the redis Connection url address to which the client will
/// connect to. /// connect to.
#[derive(Clone)]
pub struct RedisCache { pub struct RedisCache {
/// It stores a pool of connections ready to be used. /// It stores a pool of connections ready to be used.
connection_pool: Vec<ConnectionManager>, connection_pool: Vec<ConnectionManager>,
@ -20,6 +21,8 @@ pub struct RedisCache {
current_connection: u8, current_connection: u8,
/// It stores the max TTL for keys. /// It stores the max TTL for keys.
cache_ttl: u16, cache_ttl: u16,
/// It stores the redis pipeline struct of size 3.
pipeline: redis::Pipeline,
} }
impl RedisCache { impl RedisCache {
@ -30,6 +33,8 @@ impl RedisCache {
/// * `redis_connection_url` - It takes the redis Connection url address. /// * `redis_connection_url` - It takes the redis Connection url address.
/// * `pool_size` - It takes the size of the connection pool (in other words the number of /// * `pool_size` - It takes the size of the connection pool (in other words the number of
/// connections that should be stored in the pool). /// connections that should be stored in the pool).
/// * `cache_ttl` - It takes the the time to live for cached results to live in the redis
/// server.
/// ///
/// # Error /// # Error
/// ///
@ -41,18 +46,28 @@ impl RedisCache {
cache_ttl: u16, cache_ttl: u16,
) -> Result<Self, Box<dyn std::error::Error>> { ) -> Result<Self, Box<dyn std::error::Error>> {
let client = Client::open(redis_connection_url)?; let client = Client::open(redis_connection_url)?;
let mut tasks: Vec<_> = Vec::new(); let tasks: FuturesUnordered<_> = FuturesUnordered::new();
for _ in 0..pool_size { for _ in 0..pool_size {
tasks.push(client.get_connection_manager()); let client_partially_cloned = client.clone();
tasks.push(tokio::spawn(async move {
client_partially_cloned.get_connection_manager().await
}));
}
let mut outputs = Vec::new();
for task in tasks {
outputs.push(task.await??);
} }
let redis_cache = RedisCache { let redis_cache = RedisCache {
connection_pool: try_join_all(tasks).await?, connection_pool: outputs,
pool_size, pool_size,
current_connection: Default::default(), current_connection: Default::default(),
cache_ttl, cache_ttl,
pipeline: redis::Pipeline::with_capacity(REDIS_PIPELINE_SIZE),
}; };
Ok(redis_cache) Ok(redis_cache)
} }
@ -122,13 +137,14 @@ impl RedisCache {
keys: impl Iterator<Item = String>, keys: impl Iterator<Item = String>,
) -> Result<(), Report<CacheError>> { ) -> Result<(), Report<CacheError>> {
self.current_connection = Default::default(); self.current_connection = Default::default();
let mut pipeline = redis::Pipeline::with_capacity(3);
for (key, json_result) in keys.zip(json_results) { for (key, json_result) in keys.zip(json_results) {
pipeline.set_ex(key, json_result, self.cache_ttl.into()); self.pipeline
.set_ex(key, json_result, self.cache_ttl.into());
} }
let mut result: Result<(), RedisError> = pipeline let mut result: Result<(), RedisError> = self
.pipeline
.query_async(&mut self.connection_pool[self.current_connection as usize]) .query_async(&mut self.connection_pool[self.current_connection as usize])
.await; .await;
@ -149,7 +165,8 @@ impl RedisCache {
CacheError::PoolExhaustionWithConnectionDropError, CacheError::PoolExhaustionWithConnectionDropError,
)); ));
} }
result = pipeline result = self
.pipeline
.query_async( .query_async(
&mut self.connection_pool[self.current_connection as usize], &mut self.connection_pool[self.current_connection as usize],
) )

View File

@ -9,7 +9,6 @@ use mlua::Lua;
use std::{collections::HashMap, fs, thread::available_parallelism}; use std::{collections::HashMap, fs, thread::available_parallelism};
/// A named struct which stores the parsed config file options. /// A named struct which stores the parsed config file options.
#[derive(Clone)]
pub struct Config { pub struct Config {
/// It stores the parsed port number option on which the server should launch. /// It stores the parsed port number option on which the server should launch.
pub port: u16, pub port: u16,

View File

@ -48,7 +48,7 @@ impl SearchEngine for Bing {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Bing uses `start results from this number` convention // Bing uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1 // So, for 10 results per page, page 0 starts at 1, page 1
// starts at 11, and so on. // starts at 11, and so on.

View File

@ -44,7 +44,7 @@ impl SearchEngine for Brave {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
safe_search: u8, safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
let url = format!("https://search.brave.com/search?q={query}&offset={page}"); let url = format!("https://search.brave.com/search?q={query}&offset={page}");
let safe_search_level = match safe_search { let safe_search_level = match safe_search {

View File

@ -47,7 +47,7 @@ impl SearchEngine for DuckDuckGo {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = match page { let url: String = match page {

View File

@ -62,7 +62,7 @@ impl SearchEngine for LibreX {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = format!( let url: String = format!(

View File

@ -47,7 +47,7 @@ impl SearchEngine for Mojeek {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
safe_search: u8, safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Mojeek uses `start results from this number` convention // Mojeek uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1 // So, for 10 results per page, page 0 starts at 1, page 1
// starts at 11, and so on. // starts at 11, and so on.
@ -72,8 +72,23 @@ impl SearchEngine for Mojeek {
"Yep", "Yep",
"You", "You",
]; ];
let qss = search_engines.join("%2C"); let qss = search_engines.join("%2C");
let safe = if safe_search == 0 { "0" } else { "1" };
// A branchless condition to check whether the `safe_search` parameter has the
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
// the value to 1 for all other values of `safe_search`
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
let safe = u8::from(safe_search != 0).to_string();
// Mojeek detects automated requests, these are preferences that are // Mojeek detects automated requests, these are preferences that are
// able to circumvent the countermeasure. Some of these are // able to circumvent the countermeasure. Some of these are
@ -89,7 +104,7 @@ impl SearchEngine for Mojeek {
("hp", "minimal"), ("hp", "minimal"),
("lb", "en"), ("lb", "en"),
("qss", &qss), ("qss", &qss),
("safe", safe), ("safe", &safe),
]; ];
let mut query_params_string = String::new(); let mut query_params_string = String::new();

View File

@ -1,5 +1,4 @@
//! This modules provides helper functionalities for parsing a html document into internal SearchResult. //! This modules provides helper functionalities for parsing a html document into internal SearchResult.
use std::collections::HashMap;
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError}; use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
use error_stack::{Report, Result}; use error_stack::{Report, Result};
@ -47,7 +46,7 @@ impl SearchResultParser {
&self, &self,
document: &Html, document: &Html,
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>, builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
let res = document let res = document
.select(&self.results) .select(&self.results)
.filter_map(|result| { .filter_map(|result| {

View File

@ -43,12 +43,21 @@ impl SearchEngine for Searx {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
mut safe_search: u8, mut safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // A branchless condition to check whether the `safe_search` parameter has the
// so that upstream server recieves valid page number. // value greater than equal to three or not. If it is, then it modifies the
if safe_search == 3 { // `safesearch` parameters value to 2.
safe_search = 2; //
}; // Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// safe_search = u8::from(safe_search == 3) * 2;
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
safe_search = u8::from(safe_search >= 3) * 2;
let url: String = format!( let url: String = format!(
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}", "https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",

View File

@ -47,7 +47,7 @@ impl SearchEngine for Startpage {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> { ) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = format!( let url: String = format!(

View File

@ -14,7 +14,7 @@ pub mod results;
pub mod server; pub mod server;
pub mod templates; pub mod templates;
use std::net::TcpListener; use std::{net::TcpListener, sync::OnceLock};
use crate::server::router; use crate::server::router;
@ -31,6 +31,9 @@ use cache::cacher::{Cacher, SharedCache};
use config::parser::Config; use config::parser::Config;
use handler::{file_path, FileType}; use handler::{file_path, FileType};
/// A static constant for holding the cache struct.
static SHARED_CACHE: OnceLock<SharedCache> = OnceLock::new();
/// Runs the web server on the provided TCP listener and returns a `Server` instance. /// Runs the web server on the provided TCP listener and returns a `Server` instance.
/// ///
/// # Arguments /// # Arguments
@ -44,27 +47,29 @@ use handler::{file_path, FileType};
/// # Example /// # Example
/// ///
/// ```rust /// ```rust
/// use std::net::TcpListener; /// use std::{net::TcpListener, sync::OnceLock};
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache}; /// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
/// ///
/// /// A static constant for holding the parsed config.
/// static CONFIG: OnceLock<Config> = OnceLock::new();
///
/// #[tokio::main] /// #[tokio::main]
/// async fn main(){ /// async fn main(){
/// let config = Config::parse(true).unwrap(); /// // Initialize the parsed config globally.
/// let config = CONFIG.get_or_init(|| Config::parse(true).unwrap());
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address"); /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
/// let cache = create_cache(&config).await; /// let cache = create_cache(config).await;
/// let server = run(listener,config,cache).expect("Failed to start server"); /// let server = run(listener,&config,cache).expect("Failed to start server");
/// } /// }
/// ``` /// ```
pub fn run( pub fn run(
listener: TcpListener, listener: TcpListener,
config: Config, config: &'static Config,
cache: impl Cacher + 'static, cache: impl Cacher + 'static,
) -> std::io::Result<Server> { ) -> std::io::Result<Server> {
let public_folder_path: &str = file_path(FileType::Theme)?; let public_folder_path: &str = file_path(FileType::Theme)?;
let cloned_config_threads_opt: u8 = config.threads; let cache = SHARED_CACHE.get_or_init(|| SharedCache::new(cache));
let cache = web::Data::new(SharedCache::new(cache));
let server = HttpServer::new(move || { let server = HttpServer::new(move || {
let cors: Cors = Cors::default() let cors: Cors = Cors::default()
@ -81,8 +86,8 @@ pub fn run(
// Compress the responses provided by the server for the client requests. // Compress the responses provided by the server for the client requests.
.wrap(Compress::default()) .wrap(Compress::default())
.wrap(Logger::default()) // added logging middleware for logging. .wrap(Logger::default()) // added logging middleware for logging.
.app_data(web::Data::new(config.clone())) .app_data(web::Data::new(config))
.app_data(cache.clone()) .app_data(web::Data::new(cache))
.wrap(cors) .wrap(cors)
.wrap(Governor::new( .wrap(Governor::new(
&GovernorConfigBuilder::default() &GovernorConfigBuilder::default()
@ -107,7 +112,7 @@ pub fn run(
.service(router::settings) // settings page .service(router::settings) // settings page
.default_service(web::route().to(router::not_found)) // error page .default_service(web::route().to(router::not_found)) // error page
}) })
.workers(cloned_config_threads_opt as usize) .workers(config.threads as usize)
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080. // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
.listen(listener)? .listen(listener)?
.run(); .run();

View File

@ -154,8 +154,8 @@ impl SearchResults {
} }
/// A setter function that sets the filtered to true. /// A setter function that sets the filtered to true.
pub fn set_filtered(&mut self) { pub fn set_filtered(&mut self, filtered: bool) {
self.filtered = true; self.filtered = filtered;
} }
/// A getter function that gets the value of `engine_errors_info`. /// A getter function that gets the value of `engine_errors_info`.

View File

@ -4,7 +4,7 @@
use super::aggregation_models::SearchResult; use super::aggregation_models::SearchResult;
use error_stack::{Report, Result, ResultExt}; use error_stack::{Report, Result, ResultExt};
use reqwest::Client; use reqwest::Client;
use std::{collections::HashMap, fmt}; use std::fmt;
/// A custom error type used for handle engine associated errors. /// A custom error type used for handle engine associated errors.
#[derive(Debug)] #[derive(Debug)]
@ -147,7 +147,7 @@ pub trait SearchEngine: Sync + Send {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
safe_search: u8, safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError>; ) -> Result<Vec<(String, SearchResult)>, EngineError>;
} }
/// A named struct which stores the engine struct with the name of the associated engine. /// A named struct which stores the engine struct with the name of the associated engine.

View File

@ -10,7 +10,7 @@
/// order to allow the deserializing the json back to struct in aggregate function in /// order to allow the deserializing the json back to struct in aggregate function in
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
/// it to the template files. /// it to the template files.
#[derive(Clone, Default)] #[derive(Default)]
pub struct Style { pub struct Style {
/// It stores the parsed theme option used to set a theme for the website. /// It stores the parsed theme option used to set a theme for the website.
pub theme: String, pub theme: String,
@ -40,7 +40,6 @@ impl Style {
} }
/// Configuration options for the aggregator. /// Configuration options for the aggregator.
#[derive(Clone)]
pub struct AggregatorConfig { pub struct AggregatorConfig {
/// It stores the option to whether enable or disable random delays between /// It stores the option to whether enable or disable random delays between
/// requests. /// requests.
@ -48,7 +47,6 @@ pub struct AggregatorConfig {
} }
/// Configuration options for the rate limiter middleware. /// Configuration options for the rate limiter middleware.
#[derive(Clone)]
pub struct RateLimiter { pub struct RateLimiter {
/// The number of request that are allowed within a provided time limit. /// The number of request that are allowed within a provided time limit.
pub number_of_requests: u8, pub number_of_requests: u8,

View File

@ -9,22 +9,25 @@ use crate::models::{
engine_models::{EngineError, EngineHandler}, engine_models::{EngineError, EngineHandler},
}; };
use error_stack::Report; use error_stack::Report;
use futures::stream::FuturesUnordered;
use regex::Regex; use regex::Regex;
use reqwest::{Client, ClientBuilder}; use reqwest::{Client, ClientBuilder};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use std::{ use tokio::{
collections::HashMap, fs::File,
io::{BufReader, Read}, io::{AsyncBufReadExt, BufReader},
task::JoinHandle,
time::Duration, time::Duration,
}; };
use std::{fs::File, io::BufRead};
use tokio::task::JoinHandle;
/// A constant for holding the prebuilt Client globally in the app. /// A constant for holding the prebuilt Client globally in the app.
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new(); static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
/// Aliases for long type annotations /// Aliases for long type annotations
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
type FutureVec =
FuturesUnordered<JoinHandle<Result<Vec<(String, SearchResult)>, Report<EngineError>>>>;
/// The function aggregates the scraped results from the user-selected upstream search engines. /// The function aggregates the scraped results from the user-selected upstream search engines.
/// These engines can be chosen either from the user interface (UI) or from the configuration file. /// These engines can be chosen either from the user interface (UI) or from the configuration file.
@ -37,7 +40,7 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
/// ///
/// Additionally, the function eliminates duplicate results. If two results are identified as coming from /// Additionally, the function eliminates duplicate results. If two results are identified as coming from
/// multiple engines, their names are combined to indicate that the results were fetched from these upstream /// multiple engines, their names are combined to indicate that the results were fetched from these upstream
/// engines. After this, all the data in the `HashMap` is removed and placed into a struct that contains all /// engines. After this, all the data in the `Vec` is removed and placed into a struct that contains all
/// the aggregated results in a vector. Furthermore, the query used is also added to the struct. This step is /// the aggregated results in a vector. Furthermore, the query used is also added to the struct. This step is
/// necessary to ensure that the search bar in the search remains populated even when searched from the query URL. /// necessary to ensure that the search bar in the search remains populated even when searched from the query URL.
/// ///
@ -94,15 +97,22 @@ pub async fn aggregate(
let mut names: Vec<&str> = Vec::with_capacity(0); let mut names: Vec<&str> = Vec::with_capacity(0);
// create tasks for upstream result fetching // create tasks for upstream result fetching
let mut tasks: FutureVec = FutureVec::new(); let tasks: FutureVec = FutureVec::new();
let query: Arc<String> = Arc::new(query.to_string());
for engine_handler in upstream_search_engines { for engine_handler in upstream_search_engines {
let (name, search_engine) = engine_handler.to_owned().into_name_engine(); let (name, search_engine) = engine_handler.clone().into_name_engine();
names.push(name); names.push(name);
let query: String = query.to_owned(); let query_partially_cloned = query.clone();
tasks.push(tokio::spawn(async move { tasks.push(tokio::spawn(async move {
search_engine search_engine
.results(&query, page, user_agent, client, safe_search) .results(
&query_partially_cloned,
page,
user_agent,
client,
safe_search,
)
.await .await
})); }));
} }
@ -117,7 +127,7 @@ pub async fn aggregate(
} }
// aggregate search results, removing duplicates and handling errors the upstream engines returned // aggregate search results, removing duplicates and handling errors the upstream engines returned
let mut result_map: HashMap<String, SearchResult> = HashMap::new(); let mut result_map: Vec<(String, SearchResult)> = Vec::new();
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new(); let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| { let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
@ -134,51 +144,45 @@ pub async fn aggregate(
if result_map.is_empty() { if result_map.is_empty() {
match response { match response {
Ok(results) => { Ok(results) => result_map = results,
result_map = results.clone(); Err(error) => handle_error(&error, engine),
} };
Err(error) => {
handle_error(&error, engine);
}
}
continue; continue;
} }
match response { match response {
Ok(result) => { Ok(result) => {
result.into_iter().for_each(|(key, value)| { result.into_iter().for_each(|(key, value)| {
result_map match result_map.iter().find(|(key_s, _)| key_s == &key) {
.entry(key) Some(value) => value.1.to_owned().add_engines(engine),
.and_modify(|result| { None => result_map.push((key, value)),
result.add_engines(engine); };
})
.or_insert_with(|| -> SearchResult { value });
}); });
} }
Err(error) => { Err(error) => handle_error(&error, engine),
handle_error(&error, engine); };
}
}
} }
if safe_search >= 3 { if safe_search >= 3 {
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new(); let mut blacklist_map: Vec<(String, SearchResult)> = Vec::new();
filter_with_lists( filter_with_lists(
&mut result_map, &mut result_map,
&mut blacklist_map, &mut blacklist_map,
file_path(FileType::BlockList)?, file_path(FileType::BlockList)?,
)?; )
.await?;
filter_with_lists( filter_with_lists(
&mut blacklist_map, &mut blacklist_map,
&mut result_map, &mut result_map,
file_path(FileType::AllowList)?, file_path(FileType::AllowList)?,
)?; )
.await?;
drop(blacklist_map); drop(blacklist_map);
} }
let results: Vec<SearchResult> = result_map.into_values().collect(); let results: Vec<SearchResult> = result_map.iter().map(|(_, value)| value.clone()).collect();
Ok(SearchResults::new(results, &engine_errors_info)) Ok(SearchResults::new(results, &engine_errors_info))
} }
@ -187,35 +191,41 @@ pub async fn aggregate(
/// ///
/// # Arguments /// # Arguments
/// ///
/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from. /// * `map_to_be_filtered` - A mutable reference to a `Vec` of search results to filter, where the filtered results will be removed from.
/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results. /// * `resultant_map` - A mutable reference to a `Vec` to hold the filtered results.
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering. /// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
/// ///
/// # Errors /// # Errors
/// ///
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid. /// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
pub fn filter_with_lists( pub async fn filter_with_lists(
map_to_be_filtered: &mut HashMap<String, SearchResult>, map_to_be_filtered: &mut Vec<(String, SearchResult)>,
resultant_map: &mut HashMap<String, SearchResult>, resultant_map: &mut Vec<(String, SearchResult)>,
file_path: &str, file_path: &str,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
let mut reader = BufReader::new(File::open(file_path)?); let reader = BufReader::new(File::open(file_path).await?);
let mut lines = reader.lines();
for line in reader.by_ref().lines() { while let Some(line) = lines.next_line().await? {
let re = Regex::new(line?.trim())?; let re = Regex::new(line.trim())?;
let mut length = map_to_be_filtered.len();
let mut idx: usize = Default::default();
// Iterate over each search result in the map and check if it matches the regex pattern // Iterate over each search result in the map and check if it matches the regex pattern
for (url, search_result) in map_to_be_filtered.clone().into_iter() { while idx < length {
if re.is_match(&url.to_lowercase()) let ele = &map_to_be_filtered[idx];
|| re.is_match(&search_result.title.to_lowercase()) let ele_inner = &ele.1;
|| re.is_match(&search_result.description.to_lowercase()) match re.is_match(&ele.0.to_lowercase())
|| re.is_match(&ele_inner.title.to_lowercase())
|| re.is_match(&ele_inner.description.to_lowercase())
{ {
true => {
// If the search result matches the regex pattern, move it from the original map to the resultant map // If the search result matches the regex pattern, move it from the original map to the resultant map
resultant_map.insert( resultant_map.push(map_to_be_filtered.swap_remove(idx));
url.to_owned(), length -= 1;
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
);
} }
false => idx += 1,
};
} }
} }
@ -226,15 +236,14 @@ pub fn filter_with_lists(
mod tests { mod tests {
use super::*; use super::*;
use smallvec::smallvec; use smallvec::smallvec;
use std::collections::HashMap;
use std::io::Write; use std::io::Write;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
#[test] #[tokio::test]
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> { async fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
// Create a map of search results to filter // Create a map of search results to filter
let mut map_to_be_filtered = HashMap::new(); let mut map_to_be_filtered = Vec::new();
map_to_be_filtered.insert( map_to_be_filtered.push((
"https://www.example.com".to_owned(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_owned(), title: "Example Domain".to_owned(),
@ -243,15 +252,15 @@ mod tests {
.to_owned(), .to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
); ));
map_to_be_filtered.insert( map_to_be_filtered.push((
"https://www.rust-lang.org/".to_owned(), "https://www.rust-lang.org/".to_owned(),
SearchResult { SearchResult {
title: "Rust Programming Language".to_owned(), title: "Rust Programming Language".to_owned(),
url: "https://www.rust-lang.org/".to_owned(), url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
}, },)
); );
// Create a temporary file with regex patterns // Create a temporary file with regex patterns
@ -260,25 +269,30 @@ mod tests {
writeln!(file, "rust")?; writeln!(file, "rust")?;
file.flush()?; file.flush()?;
let mut resultant_map = HashMap::new(); let mut resultant_map = Vec::new();
filter_with_lists( filter_with_lists(
&mut map_to_be_filtered, &mut map_to_be_filtered,
&mut resultant_map, &mut resultant_map,
file.path().to_str().unwrap(), file.path().to_str().unwrap(),
)?; )
.await?;
assert_eq!(resultant_map.len(), 2); assert_eq!(resultant_map.len(), 2);
assert!(resultant_map.contains_key("https://www.example.com")); assert!(resultant_map
assert!(resultant_map.contains_key("https://www.rust-lang.org/")); .iter()
.any(|(key, _)| key == "https://www.example.com"));
assert!(resultant_map
.iter()
.any(|(key, _)| key == "https://www.rust-lang.org/"));
assert_eq!(map_to_be_filtered.len(), 0); assert_eq!(map_to_be_filtered.len(), 0);
Ok(()) Ok(())
} }
#[test] #[tokio::test]
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> { async fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
let mut map_to_be_filtered = HashMap::new(); let mut map_to_be_filtered = Vec::new();
map_to_be_filtered.insert( map_to_be_filtered.push((
"https://www.example.com".to_owned(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_owned(), title: "Example Domain".to_owned(),
@ -287,8 +301,8 @@ mod tests {
.to_owned(), .to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
); ));
map_to_be_filtered.insert( map_to_be_filtered.push((
"https://www.rust-lang.org/".to_owned(), "https://www.rust-lang.org/".to_owned(),
SearchResult { SearchResult {
title: "Rust Programming Language".to_owned(), title: "Rust Programming Language".to_owned(),
@ -296,34 +310,39 @@ mod tests {
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
}, },
); ));
// Create a temporary file with a regex pattern containing a wildcard // Create a temporary file with a regex pattern containing a wildcard
let mut file = NamedTempFile::new()?; let mut file = NamedTempFile::new()?;
writeln!(file, "ex.*le")?; writeln!(file, "ex.*le")?;
file.flush()?; file.flush()?;
let mut resultant_map = HashMap::new(); let mut resultant_map = Vec::new();
filter_with_lists( filter_with_lists(
&mut map_to_be_filtered, &mut map_to_be_filtered,
&mut resultant_map, &mut resultant_map,
file.path().to_str().unwrap(), file.path().to_str().unwrap(),
)?; )
.await?;
assert_eq!(resultant_map.len(), 1); assert_eq!(resultant_map.len(), 1);
assert!(resultant_map.contains_key("https://www.example.com")); assert!(resultant_map
.iter()
.any(|(key, _)| key == "https://www.example.com"));
assert_eq!(map_to_be_filtered.len(), 1); assert_eq!(map_to_be_filtered.len(), 1);
assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/")); assert!(map_to_be_filtered
.iter()
.any(|(key, _)| key == "https://www.rust-lang.org/"));
Ok(()) Ok(())
} }
#[test] #[tokio::test]
fn test_filter_with_lists_file_not_found() { async fn test_filter_with_lists_file_not_found() {
let mut map_to_be_filtered = HashMap::new(); let mut map_to_be_filtered = Vec::new();
let mut resultant_map = HashMap::new(); let mut resultant_map = Vec::new();
// Call the `filter_with_lists` function with a non-existent file path // Call the `filter_with_lists` function with a non-existent file path
let result = filter_with_lists( let result = filter_with_lists(
@ -332,13 +351,13 @@ mod tests {
"non-existent-file.txt", "non-existent-file.txt",
); );
assert!(result.is_err()); assert!(result.await.is_err());
} }
#[test] #[tokio::test]
fn test_filter_with_lists_invalid_regex() { async fn test_filter_with_lists_invalid_regex() {
let mut map_to_be_filtered = HashMap::new(); let mut map_to_be_filtered = Vec::new();
map_to_be_filtered.insert( map_to_be_filtered.push((
"https://www.example.com".to_owned(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_owned(), title: "Example Domain".to_owned(),
@ -347,9 +366,9 @@ mod tests {
.to_owned(), .to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
); ));
let mut resultant_map = HashMap::new(); let mut resultant_map = Vec::new();
// Create a temporary file with an invalid regex pattern // Create a temporary file with an invalid regex pattern
let mut file = NamedTempFile::new().unwrap(); let mut file = NamedTempFile::new().unwrap();
@ -362,6 +381,6 @@ mod tests {
file.path().to_str().unwrap(), file.path().to_str().unwrap(),
); );
assert!(result.is_err()); assert!(result.await.is_err());
} }
} }

View File

@ -7,11 +7,13 @@ use crate::{
handler::{file_path, FileType}, handler::{file_path, FileType},
}; };
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse}; use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use std::fs::read_to_string; use tokio::fs::read_to_string;
/// Handles the route of index page or main page of the `websurfx` meta search engine website. /// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")] #[get("/")]
pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> { pub async fn index(
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::index::index( crate::templates::views::index::index(
&config.style.colorscheme, &config.style.colorscheme,
@ -25,7 +27,7 @@ pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn st
/// Handles the route of any other accessed route/page which is not provided by the /// Handles the route of any other accessed route/page which is not provided by the
/// website essentially the 404 error page. /// website essentially the 404 error page.
pub async fn not_found( pub async fn not_found(
config: web::Data<Config>, config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::not_found::not_found( crate::templates::views::not_found::not_found(
@ -41,7 +43,7 @@ pub async fn not_found(
#[get("/robots.txt")] #[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> { pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = let page_content: String =
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?)).await?;
Ok(HttpResponse::Ok() Ok(HttpResponse::Ok()
.content_type(ContentType::plaintext()) .content_type(ContentType::plaintext())
.body(page_content)) .body(page_content))
@ -49,7 +51,9 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
/// Handles the route of about page of the `websurfx` meta search engine website. /// Handles the route of about page of the `websurfx` meta search engine website.
#[get("/about")] #[get("/about")]
pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> { pub async fn about(
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::about::about( crate::templates::views::about::about(
&config.style.colorscheme, &config.style.colorscheme,
@ -63,7 +67,7 @@ pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn st
/// Handles the route of settings page of the `websurfx` meta search engine website. /// Handles the route of settings page of the `websurfx` meta search engine website.
#[get("/settings")] #[get("/settings")]
pub async fn settings( pub async fn settings(
config: web::Data<Config>, config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::settings::settings( crate::templates::views::settings::settings(

View File

@ -13,12 +13,12 @@ use crate::{
}; };
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse}; use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use regex::Regex; use regex::Regex;
use std::{ use std::borrow::Cow;
borrow::Cow, use tokio::{
fs::File, fs::File,
io::{BufRead, BufReader, Read}, io::{AsyncBufReadExt, BufReader},
join,
}; };
use tokio::join;
/// Handles the route of search page of the `websurfx` meta search engine website and it takes /// Handles the route of search page of the `websurfx` meta search engine website and it takes
/// two search url parameters `q` and `page` where `page` parameter is optional. /// two search url parameters `q` and `page` where `page` parameter is optional.
@ -37,8 +37,8 @@ use tokio::join;
#[get("/search")] #[get("/search")]
pub async fn search( pub async fn search(
req: HttpRequest, req: HttpRequest,
config: web::Data<Config>, config: web::Data<&'static Config>,
cache: web::Data<SharedCache>, cache: web::Data<&'static SharedCache>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
use std::sync::Arc; use std::sync::Arc;
let params = web::Query::<SearchParams>::from_query(req.query_string())?; let params = web::Query::<SearchParams>::from_query(req.query_string())?;
@ -70,8 +70,8 @@ pub async fn search(
}); });
search_settings.safe_search_level = get_safesearch_level( search_settings.safe_search_level = get_safesearch_level(
&Some(search_settings.safe_search_level), params.safesearch,
&params.safesearch, search_settings.safe_search_level,
config.safe_search, config.safe_search,
); );
@ -158,8 +158,8 @@ pub async fn search(
/// It returns the `SearchResults` struct if the search results could be successfully fetched from /// It returns the `SearchResults` struct if the search results could be successfully fetched from
/// the cache or from the upstream search engines otherwise it returns an appropriate error. /// the cache or from the upstream search engines otherwise it returns an appropriate error.
async fn results( async fn results(
config: &Config, config: &'static Config,
cache: &web::Data<SharedCache>, cache: &'static SharedCache,
query: &str, query: &str,
page: u32, page: u32,
search_settings: &server_models::Cookie<'_>, search_settings: &server_models::Cookie<'_>,
@ -188,7 +188,7 @@ async fn results(
let mut results: SearchResults = SearchResults::default(); let mut results: SearchResults = SearchResults::default();
let flag: bool = let flag: bool =
!is_match_from_filter_list(file_path(FileType::BlockList)?, query)?; !is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?;
// Return early when query contains disallowed words, // Return early when query contains disallowed words,
if flag { if flag {
results.set_disallowed(); results.set_disallowed();
@ -225,12 +225,12 @@ async fn results(
search_results search_results
} }
}; };
if results.engine_errors_info().is_empty() let (engine_errors_info, results_empty_check, no_engines_selected) = (
&& results.results().is_empty() results.engine_errors_info().is_empty(),
&& !results.no_engines_selected() results.results().is_empty(),
{ results.no_engines_selected(),
results.set_filtered(); );
} results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected);
cache cache
.cache_results(&[results.clone()], &[cache_key.clone()]) .cache_results(&[results.clone()], &[cache_key.clone()])
.await?; .await?;
@ -252,13 +252,14 @@ async fn results(
/// ///
/// Returns a bool indicating whether the results were found in the list or not on success /// Returns a bool indicating whether the results were found in the list or not on success
/// otherwise returns a standard error type on a failure. /// otherwise returns a standard error type on a failure.
fn is_match_from_filter_list( async fn is_match_from_filter_list(
file_path: &str, file_path: &str,
query: &str, query: &str,
) -> Result<bool, Box<dyn std::error::Error>> { ) -> Result<bool, Box<dyn std::error::Error>> {
let mut reader = BufReader::new(File::open(file_path)?); let reader = BufReader::new(File::open(file_path).await?);
for line in reader.by_ref().lines() { let mut lines = reader.lines();
let re = Regex::new(&line?)?; while let Some(line) = lines.next_line().await? {
let re = Regex::new(&line)?;
if re.is_match(query) { if re.is_match(query) {
return Ok(true); return Ok(true);
} }
@ -267,24 +268,95 @@ fn is_match_from_filter_list(
Ok(false) Ok(false)
} }
/// A helper function to modify the safe search level based on the url params. /// A helper function to choose the safe search level value based on the URL parameters,
/// The `safe_search` is the one in the user's cookie or /// cookie value and config value.
/// the default set by the server config if the cookie was missing.
/// ///
/// # Argurments /// # Argurments
/// ///
/// * `url_level` - Safe search level from the url. /// * `safe_search_level_from_url` - Safe search level from the URL parameters.
/// * `safe_search` - User's cookie, or the safe search level set by the server /// * `cookie_safe_search_level` - Safe search level value from the cookie.
/// * `config_level` - Safe search level to fall back to /// * `config_safe_search_level` - Safe search level value from the config file.
fn get_safesearch_level(cookie_level: &Option<u8>, url_level: &Option<u8>, config_level: u8) -> u8 { ///
match url_level { /// # Returns
Some(url_level) => { ///
if *url_level >= 3 { /// Returns an appropriate safe search level value based on the safe search level values
config_level /// from the URL parameters, cookie and the config file.
fn get_safesearch_level(
safe_search_level_from_url: Option<u8>,
cookie_safe_search_level: u8,
config_safe_search_level: u8,
) -> u8 {
(u8::from(safe_search_level_from_url.is_some())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * safe_search_level_from_url.unwrap_or(0))))
+ (u8::from(safe_search_level_from_url.is_none())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)))
}
#[cfg(test)]
mod tests {
use std::time::{SystemTime, UNIX_EPOCH};
/// A helper function which creates a random mock safe search level value.
///
/// # Returns
///
/// Returns an optional u8 value.
fn mock_safe_search_level_value() -> Option<u8> {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.subsec_nanos() as f32;
let delay = ((nanos / 1_0000_0000 as f32).floor() as i8) - 1;
match delay {
-1 => None,
some_num => Some(if some_num > 4 { some_num - 4 } else { some_num } as u8),
}
}
#[test]
/// A test function to test whether the output of the branchless and branched code
/// for the code to choose the appropriate safe search level is same or not.
fn get_safesearch_level_branched_branchless_code_test() {
// Get mock values for the safe search level values for URL parameters, cookie
// and config.
let safe_search_level_from_url = mock_safe_search_level_value();
let cookie_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
let config_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
// Branched code
let safe_search_level_value_from_branched_code = match safe_search_level_from_url {
Some(safe_search_level_from_url_parsed) => {
if config_safe_search_level >= 3 {
config_safe_search_level
} else { } else {
*url_level safe_search_level_from_url_parsed
} }
} }
None => cookie_level.unwrap_or(config_level), None => {
if config_safe_search_level >= 3 {
config_safe_search_level
} else {
cookie_safe_search_level
}
}
};
// branchless code
let safe_search_level_value_from_branchless_code =
(u8::from(safe_search_level_from_url.is_some())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3)
* safe_search_level_from_url.unwrap_or(0))))
+ (u8::from(safe_search_level_from_url.is_none())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)));
assert_eq!(
safe_search_level_value_from_branched_code,
safe_search_level_value_from_branchless_code
);
} }
} }

View File

@ -55,7 +55,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
input type="checkbox" class="engine" checked; input type="checkbox" class="engine" checked;
span class="slider round"{} span class="slider round"{}
} }
(format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned())) (format!("{}{}",&engine_name[..1].to_uppercase(), &engine_name[1..]))
} }
} }
@else { @else {
@ -64,7 +64,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
input type="checkbox" class="engine"; input type="checkbox" class="engine";
span class="slider round"{} span class="slider round"{}
} }
(format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned())) (format!("{}{}",&engine_name[..1], &engine_name[1..]))
} }
} }
} }

View File

@ -36,7 +36,7 @@ fn style_option_list(
} }
if style_type == "animations" { if style_type == "animations" {
style_option_names.push(("".to_owned(), "none".to_owned())) style_option_names.push((String::default(), "none".to_owned()))
} }
Ok(style_option_names) Ok(style_option_names)
@ -83,9 +83,11 @@ pub fn user_interface(
"Select the animation for your theme to be used in user interface" "Select the animation for your theme to be used in user interface"
} }
select name="animations"{ select name="animations"{
@let default_animation = &String::default();
@let animation = animation.as_ref().unwrap_or(default_animation);
// Sets the user selected animation name from the config file as the first option in the selection list. // Sets the user selected animation name from the config file as the first option in the selection list.
option value=(animation.as_ref().unwrap_or(&"".to_owned())){(animation.as_ref().unwrap_or(&"".to_owned()).replace('-'," "))} option value=(animation){(animation.replace('-'," "))}
@for (k,v) in style_option_list("animations", animation.as_ref().unwrap_or(&"".to_owned()))?{ @for (k,v) in style_option_list("animations", animation)?{
option value=(k){(v)} option value=(k){(v)}
} }
} }

View File

@ -38,7 +38,7 @@ pub fn search(
small{(result.url)} small{(result.url)}
p{(PreEscaped(&result.description))} p{(PreEscaped(&result.description))}
.upstream_engines{ .upstream_engines{
@for name in result.clone().engine{ @for name in &result.engine {
span{(name)} span{(name)}
} }
} }

View File

@ -1,14 +1,17 @@
use std::net::TcpListener; use std::{net::TcpListener, sync::OnceLock};
use websurfx::{config::parser::Config, run, templates::views}; use websurfx::{config::parser::Config, run, templates::views};
/// A static constant for holding the parsed config.
static CONFIG: OnceLock<Config> = OnceLock::new();
// Starts a new instance of the HTTP server, bound to a random available port // Starts a new instance of the HTTP server, bound to a random available port
async fn spawn_app() -> String { async fn spawn_app() -> String {
// Binding to port 0 will trigger the OS to assign a port for us. // Binding to port 0 will trigger the OS to assign a port for us.
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port"); let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
let port = listener.local_addr().unwrap().port(); let port = listener.local_addr().unwrap().port();
let config = Config::parse(false).unwrap(); let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
let cache = websurfx::cache::cacher::create_cache(&config).await; let cache = websurfx::cache::cacher::create_cache(config).await;
let server = run(listener, config, cache).expect("Failed to bind address"); let server = run(listener, config, cache).expect("Failed to bind address");
tokio::spawn(server); tokio::spawn(server);