0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-26 15:58:21 -05:00

Compare commits

..

No commits in common. "c3da0b330318cacbcfe2c79c2cfad6e1c471368a" and "4bac2a5872db918a66174039953def512496ac30" have entirely different histories.

25 changed files with 222 additions and 379 deletions

2
Cargo.lock generated
View File

@ -4205,11 +4205,11 @@ dependencies = [
"actix-files", "actix-files",
"actix-governor", "actix-governor",
"actix-web", "actix-web",
"async-compression",
"async-once-cell", "async-once-cell",
"async-trait", "async-trait",
"base64 0.21.7", "base64 0.21.7",
"blake3", "blake3",
"brotli",
"cfg-if 1.0.0", "cfg-if 1.0.0",
"chacha20", "chacha20",
"chacha20poly1305", "chacha20poly1305",

View File

@ -14,7 +14,7 @@ path = "src/bin/websurfx.rs"
[dependencies] [dependencies]
reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]} reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
tokio = {version="1.32.0",features=["rt-multi-thread","macros", "fs", "io-util"], default-features = false} tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false}
serde = {version="1.0.196", default-features=false, features=["derive"]} serde = {version="1.0.196", default-features=false, features=["derive"]}
serde_json = {version="1.0.109", default-features=false} serde_json = {version="1.0.109", default-features=false}
maud = {version="0.25.0", default-features=false, features=["actix-web"]} maud = {version="0.25.0", default-features=false, features=["actix-web"]}
@ -32,13 +32,13 @@ error-stack = {version="0.4.0", default-features=false, features=["std"]}
async-trait = {version="0.1.76", default-features=false} async-trait = {version="0.1.76", default-features=false}
regex = {version="1.9.4", features=["perf"], default-features = false} regex = {version="1.9.4", features=["perf"], default-features = false}
smallvec = {version="1.13.1", features=["union", "serde"], default-features=false} smallvec = {version="1.13.1", features=["union", "serde"], default-features=false}
futures = {version="0.3.30", default-features=false, features=["alloc"]} futures = {version="0.3.28", default-features=false}
dhat = {version="0.3.2", optional = true, default-features=false} dhat = {version="0.3.3", optional = true, default-features=false}
mimalloc = { version = "0.1.38", default-features = false } mimalloc = { version = "0.1.38", default-features = false }
async-once-cell = {version="0.5.3", default-features=false} async-once-cell = {version="0.5.3", default-features=false}
actix-governor = {version="0.5.0", default-features=false} actix-governor = {version="0.5.0", default-features=false}
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]} mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
async-compression = { version = "0.4.6", default-features = false, features=["brotli","tokio"], optional=true} brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true}
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true} chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
chacha20 = {version="0.9.1", default-features=false, optional=true} chacha20 = {version="0.9.1", default-features=false, optional=true}
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true} base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
@ -84,7 +84,7 @@ default = ["memory-cache"]
dhat-heap = ["dep:dhat"] dhat-heap = ["dep:dhat"]
memory-cache = ["dep:mini-moka"] memory-cache = ["dep:mini-moka"]
redis-cache = ["dep:redis","dep:base64"] redis-cache = ["dep:redis","dep:base64"]
compress-cache-results = ["dep:async-compression","dep:cfg-if"] compress-cache-results = ["dep:brotli","dep:cfg-if"]
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"] encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
cec-cache-results = ["compress-cache-results","encrypt-cache-results"] cec-cache-results = ["compress-cache-results","encrypt-cache-results"]
experimental-io-uring = ["actix-web/experimental-io-uring"] experimental-io-uring = ["actix-web/experimental-io-uring"]

View File

@ -5,7 +5,7 @@
#[cfg(not(feature = "dhat-heap"))] #[cfg(not(feature = "dhat-heap"))]
use mimalloc::MiMalloc; use mimalloc::MiMalloc;
use std::{net::TcpListener, sync::OnceLock}; use std::net::TcpListener;
use websurfx::{cache::cacher::create_cache, config::parser::Config, run}; use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
/// A dhat heap memory profiler /// A dhat heap memory profiler
@ -17,9 +17,6 @@ static ALLOC: dhat::Alloc = dhat::Alloc;
#[global_allocator] #[global_allocator]
static GLOBAL: MiMalloc = MiMalloc; static GLOBAL: MiMalloc = MiMalloc;
/// A static constant for holding the parsed config.
static CONFIG: OnceLock<Config> = OnceLock::new();
/// The function that launches the main server and registers all the routes of the website. /// The function that launches the main server and registers all the routes of the website.
/// ///
/// # Error /// # Error
@ -32,10 +29,10 @@ async fn main() -> std::io::Result<()> {
#[cfg(feature = "dhat-heap")] #[cfg(feature = "dhat-heap")]
let _profiler = dhat::Profiler::new_heap(); let _profiler = dhat::Profiler::new_heap();
// Initialize the parsed config globally. // Initialize the parsed config file.
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap()); let config = Config::parse(false).unwrap();
let cache = create_cache(config).await; let cache = create_cache(&config).await;
log::info!( log::info!(
"started server on port {} and IP {}", "started server on port {} and IP {}",
@ -48,7 +45,7 @@ async fn main() -> std::io::Result<()> {
config.port, config.port,
); );
let listener = TcpListener::bind((config.binding_ip.as_str(), config.port))?; let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?;
run(listener, config, cache)?.await run(listener, config, cache)?.await
} }

68
src/cache/cacher.rs vendored
View File

@ -93,7 +93,7 @@ pub trait Cacher: Send + Sync {
feature = "encrypt-cache-results", feature = "encrypt-cache-results",
feature = "cec-cache-results" feature = "cec-cache-results"
))] ))]
async fn encrypt_or_decrypt_results( fn encrypt_or_decrypt_results(
&mut self, &mut self,
mut bytes: Vec<u8>, mut bytes: Vec<u8>,
encrypt: bool, encrypt: bool,
@ -137,19 +137,11 @@ pub trait Cacher: Send + Sync {
/// Returns the compressed bytes on success otherwise it returns a CacheError /// Returns the compressed bytes on success otherwise it returns a CacheError
/// on failure. /// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn compress_results( fn compress_results(&mut self, mut bytes: Vec<u8>) -> Result<Vec<u8>, Report<CacheError>> {
&mut self, use std::io::Write;
mut bytes: Vec<u8>, let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22);
) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
writer writer
.write_all(&bytes) .write_all(&bytes)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
.map_err(|_| CacheError::CompressionError)?; .map_err(|_| CacheError::CompressionError)?;
bytes = writer.into_inner(); bytes = writer.into_inner();
Ok(bytes) Ok(bytes)
@ -167,17 +159,17 @@ pub trait Cacher: Send + Sync {
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError /// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
/// on failure. /// on failure.
#[cfg(feature = "cec-cache-results")] #[cfg(feature = "cec-cache-results")]
async fn compress_encrypt_compress_results( fn compress_encrypt_compress_results(
&mut self, &mut self,
mut bytes: Vec<u8>, mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> { ) -> Result<Vec<u8>, Report<CacheError>> {
// compress first // compress first
bytes = self.compress_results(bytes).await?; bytes = self.compress_results(bytes)?;
// encrypt // encrypt
bytes = self.encrypt_or_decrypt_results(bytes, true).await?; bytes = self.encrypt_or_decrypt_results(bytes, true)?;
// compress again; // compress again;
bytes = self.compress_results(bytes).await?; bytes = self.compress_results(bytes)?;
Ok(bytes) Ok(bytes)
} }
@ -195,11 +187,11 @@ pub trait Cacher: Send + Sync {
/// on failure. /// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> { fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(feature = "compress-cache-results")] if #[cfg(feature = "compress-cache-results")]
{ {
decompress_util(bytes).await decompress_util(bytes)
} }
else if #[cfg(feature = "cec-cache-results")] else if #[cfg(feature = "cec-cache-results")]
@ -207,7 +199,7 @@ pub trait Cacher: Send + Sync {
let decompressed = decompress_util(bytes)?; let decompressed = decompress_util(bytes)?;
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?; let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
decompress_util(&decrypted).await decompress_util(&decrypted)
} }
} }
@ -224,7 +216,7 @@ pub trait Cacher: Send + Sync {
/// # Error /// # Error
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError /// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
/// on failure. /// on failure.
async fn pre_process_search_results( fn pre_process_search_results(
&mut self, &mut self,
search_results: &SearchResults, search_results: &SearchResults,
) -> Result<Vec<u8>, Report<CacheError>> { ) -> Result<Vec<u8>, Report<CacheError>> {
@ -232,20 +224,19 @@ pub trait Cacher: Send + Sync {
let mut bytes: Vec<u8> = search_results.try_into()?; let mut bytes: Vec<u8> = search_results.try_into()?;
#[cfg(feature = "compress-cache-results")] #[cfg(feature = "compress-cache-results")]
{ {
let compressed = self.compress_results(bytes).await?; let compressed = self.compress_results(bytes)?;
bytes = compressed; bytes = compressed;
} }
#[cfg(feature = "encrypt-cache-results")] #[cfg(feature = "encrypt-cache-results")]
{ {
let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?; let encrypted = self.encrypt_or_decrypt_results(bytes, true)?;
bytes = encrypted; bytes = encrypted;
} }
#[cfg(feature = "cec-cache-results")] #[cfg(feature = "cec-cache-results")]
{ {
let compressed_encrypted_compressed = let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?;
self.compress_encrypt_compress_results(bytes).await?;
bytes = compressed_encrypted_compressed; bytes = compressed_encrypted_compressed;
} }
@ -265,25 +256,25 @@ pub trait Cacher: Send + Sync {
/// on failure. /// on failure.
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled #[allow(unused_mut)] // needs to be mutable when any of the features is enabled
async fn post_process_search_results( fn post_process_search_results(
&mut self, &mut self,
mut bytes: Vec<u8>, mut bytes: Vec<u8>,
) -> Result<SearchResults, Report<CacheError>> { ) -> Result<SearchResults, Report<CacheError>> {
#[cfg(feature = "compress-cache-results")] #[cfg(feature = "compress-cache-results")]
{ {
let decompressed = self.decompress_results(&bytes).await?; let decompressed = self.decompress_results(&bytes)?;
bytes = decompressed bytes = decompressed
} }
#[cfg(feature = "encrypt-cache-results")] #[cfg(feature = "encrypt-cache-results")]
{ {
let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?; let decrypted = self.encrypt_or_decrypt_results(bytes, false)?;
bytes = decrypted bytes = decrypted
} }
#[cfg(feature = "cec-cache-results")] #[cfg(feature = "cec-cache-results")]
{ {
let decompressed_decrypted = self.decompress_results(&bytes).await?; let decompressed_decrypted = self.decompress_results(&bytes)?;
bytes = decompressed_decrypted; bytes = decompressed_decrypted;
} }
@ -304,19 +295,16 @@ pub trait Cacher: Send + Sync {
/// on failure. /// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> { fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt; use std::io::Write;
let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new()); let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096);
writer writer
.write_all(input) .write_all(input)
.await
.map_err(|_| CacheError::CompressionError)?; .map_err(|_| CacheError::CompressionError)?;
writer let bytes = writer
.shutdown() .into_inner()
.await
.map_err(|_| CacheError::CompressionError)?; .map_err(|_| CacheError::CompressionError)?;
let bytes = writer.into_inner();
Ok(bytes) Ok(bytes)
} }
@ -341,7 +329,7 @@ impl Cacher for RedisCache {
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(base64_string) .decode(base64_string)
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?; .map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
self.post_process_search_results(bytes).await self.post_process_search_results(bytes)
} }
async fn cache_results( async fn cache_results(
@ -357,7 +345,7 @@ impl Cacher for RedisCache {
let mut bytes = Vec::with_capacity(search_results_len); let mut bytes = Vec::with_capacity(search_results_len);
for result in search_results { for result in search_results {
let processed = self.pre_process_search_results(result).await?; let processed = self.pre_process_search_results(result)?;
bytes.push(processed); bytes.push(processed);
} }
@ -417,7 +405,7 @@ impl Cacher for InMemoryCache {
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> { async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url); let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string) { match self.cache.get(&hashed_url_string) {
Some(res) => self.post_process_search_results(res).await, Some(res) => self.post_process_search_results(res),
None => Err(Report::new(CacheError::MissingValue)), None => Err(Report::new(CacheError::MissingValue)),
} }
} }
@ -429,7 +417,7 @@ impl Cacher for InMemoryCache {
) -> Result<(), Report<CacheError>> { ) -> Result<(), Report<CacheError>> {
for (url, search_result) in urls.iter().zip(search_results.iter()) { for (url, search_result) in urls.iter().zip(search_results.iter()) {
let hashed_url_string = self.hash_url(url); let hashed_url_string = self.hash_url(url);
let bytes = self.pre_process_search_results(search_result).await?; let bytes = self.pre_process_search_results(search_result)?;
self.cache.insert(hashed_url_string, bytes); self.cache.insert(hashed_url_string, bytes);
} }

View File

@ -1,16 +1,15 @@
//! This module provides the functionality to cache the aggregated results fetched and aggregated //! This module provides the functionality to cache the aggregated results fetched and aggregated
//! from the upstream search engines in a json format. //! from the upstream search engines in a json format.
use super::error::CacheError;
use error_stack::Report; use error_stack::Report;
use futures::stream::FuturesUnordered; use futures::future::try_join_all;
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError}; use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
/// A constant holding the redis pipeline size. use super::error::CacheError;
const REDIS_PIPELINE_SIZE: usize = 3;
/// A named struct which stores the redis Connection url address to which the client will /// A named struct which stores the redis Connection url address to which the client will
/// connect to. /// connect to.
#[derive(Clone)]
pub struct RedisCache { pub struct RedisCache {
/// It stores a pool of connections ready to be used. /// It stores a pool of connections ready to be used.
connection_pool: Vec<ConnectionManager>, connection_pool: Vec<ConnectionManager>,
@ -21,8 +20,6 @@ pub struct RedisCache {
current_connection: u8, current_connection: u8,
/// It stores the max TTL for keys. /// It stores the max TTL for keys.
cache_ttl: u16, cache_ttl: u16,
/// It stores the redis pipeline struct of size 3.
pipeline: redis::Pipeline,
} }
impl RedisCache { impl RedisCache {
@ -33,8 +30,6 @@ impl RedisCache {
/// * `redis_connection_url` - It takes the redis Connection url address. /// * `redis_connection_url` - It takes the redis Connection url address.
/// * `pool_size` - It takes the size of the connection pool (in other words the number of /// * `pool_size` - It takes the size of the connection pool (in other words the number of
/// connections that should be stored in the pool). /// connections that should be stored in the pool).
/// * `cache_ttl` - It takes the the time to live for cached results to live in the redis
/// server.
/// ///
/// # Error /// # Error
/// ///
@ -46,28 +41,18 @@ impl RedisCache {
cache_ttl: u16, cache_ttl: u16,
) -> Result<Self, Box<dyn std::error::Error>> { ) -> Result<Self, Box<dyn std::error::Error>> {
let client = Client::open(redis_connection_url)?; let client = Client::open(redis_connection_url)?;
let tasks: FuturesUnordered<_> = FuturesUnordered::new(); let mut tasks: Vec<_> = Vec::new();
for _ in 0..pool_size { for _ in 0..pool_size {
let client_partially_cloned = client.clone(); tasks.push(client.get_connection_manager());
tasks.push(tokio::spawn(async move {
client_partially_cloned.get_connection_manager().await
}));
}
let mut outputs = Vec::new();
for task in tasks {
outputs.push(task.await??);
} }
let redis_cache = RedisCache { let redis_cache = RedisCache {
connection_pool: outputs, connection_pool: try_join_all(tasks).await?,
pool_size, pool_size,
current_connection: Default::default(), current_connection: Default::default(),
cache_ttl, cache_ttl,
pipeline: redis::Pipeline::with_capacity(REDIS_PIPELINE_SIZE),
}; };
Ok(redis_cache) Ok(redis_cache)
} }
@ -137,14 +122,13 @@ impl RedisCache {
keys: impl Iterator<Item = String>, keys: impl Iterator<Item = String>,
) -> Result<(), Report<CacheError>> { ) -> Result<(), Report<CacheError>> {
self.current_connection = Default::default(); self.current_connection = Default::default();
let mut pipeline = redis::Pipeline::with_capacity(3);
for (key, json_result) in keys.zip(json_results) { for (key, json_result) in keys.zip(json_results) {
self.pipeline pipeline.set_ex(key, json_result, self.cache_ttl.into());
.set_ex(key, json_result, self.cache_ttl.into());
} }
let mut result: Result<(), RedisError> = self let mut result: Result<(), RedisError> = pipeline
.pipeline
.query_async(&mut self.connection_pool[self.current_connection as usize]) .query_async(&mut self.connection_pool[self.current_connection as usize])
.await; .await;
@ -165,8 +149,7 @@ impl RedisCache {
CacheError::PoolExhaustionWithConnectionDropError, CacheError::PoolExhaustionWithConnectionDropError,
)); ));
} }
result = self result = pipeline
.pipeline
.query_async( .query_async(
&mut self.connection_pool[self.current_connection as usize], &mut self.connection_pool[self.current_connection as usize],
) )

View File

@ -9,6 +9,7 @@ use mlua::Lua;
use std::{collections::HashMap, fs, thread::available_parallelism}; use std::{collections::HashMap, fs, thread::available_parallelism};
/// A named struct which stores the parsed config file options. /// A named struct which stores the parsed config file options.
#[derive(Clone)]
pub struct Config { pub struct Config {
/// It stores the parsed port number option on which the server should launch. /// It stores the parsed port number option on which the server should launch.
pub port: u16, pub port: u16,

View File

@ -48,7 +48,7 @@ impl SearchEngine for Bing {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
// Bing uses `start results from this number` convention // Bing uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1 // So, for 10 results per page, page 0 starts at 1, page 1
// starts at 11, and so on. // starts at 11, and so on.

View File

@ -44,7 +44,7 @@ impl SearchEngine for Brave {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
safe_search: u8, safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
let url = format!("https://search.brave.com/search?q={query}&offset={page}"); let url = format!("https://search.brave.com/search?q={query}&offset={page}");
let safe_search_level = match safe_search { let safe_search_level = match safe_search {

View File

@ -47,7 +47,7 @@ impl SearchEngine for DuckDuckGo {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = match page { let url: String = match page {

View File

@ -62,7 +62,7 @@ impl SearchEngine for LibreX {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = format!( let url: String = format!(

View File

@ -47,7 +47,7 @@ impl SearchEngine for Mojeek {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
safe_search: u8, safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
// Mojeek uses `start results from this number` convention // Mojeek uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1 // So, for 10 results per page, page 0 starts at 1, page 1
// starts at 11, and so on. // starts at 11, and so on.
@ -72,23 +72,8 @@ impl SearchEngine for Mojeek {
"Yep", "Yep",
"You", "You",
]; ];
let qss = search_engines.join("%2C"); let qss = search_engines.join("%2C");
let safe = if safe_search == 0 { "0" } else { "1" };
// A branchless condition to check whether the `safe_search` parameter has the
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
// the value to 1 for all other values of `safe_search`
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
let safe = u8::from(safe_search != 0).to_string();
// Mojeek detects automated requests, these are preferences that are // Mojeek detects automated requests, these are preferences that are
// able to circumvent the countermeasure. Some of these are // able to circumvent the countermeasure. Some of these are
@ -104,7 +89,7 @@ impl SearchEngine for Mojeek {
("hp", "minimal"), ("hp", "minimal"),
("lb", "en"), ("lb", "en"),
("qss", &qss), ("qss", &qss),
("safe", &safe), ("safe", safe),
]; ];
let mut query_params_string = String::new(); let mut query_params_string = String::new();

View File

@ -1,4 +1,5 @@
//! This modules provides helper functionalities for parsing a html document into internal SearchResult. //! This modules provides helper functionalities for parsing a html document into internal SearchResult.
use std::collections::HashMap;
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError}; use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
use error_stack::{Report, Result}; use error_stack::{Report, Result};
@ -46,7 +47,7 @@ impl SearchResultParser {
&self, &self,
document: &Html, document: &Html,
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>, builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
let res = document let res = document
.select(&self.results) .select(&self.results)
.filter_map(|result| { .filter_map(|result| {

View File

@ -43,21 +43,12 @@ impl SearchEngine for Searx {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
mut safe_search: u8, mut safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
// A branchless condition to check whether the `safe_search` parameter has the // Page number can be missing or empty string and so appropriate handling is required
// value greater than equal to three or not. If it is, then it modifies the // so that upstream server recieves valid page number.
// `safesearch` parameters value to 2. if safe_search == 3 {
// safe_search = 2;
// Moreover, the below branchless code is equivalent to the following code below: };
//
// ```rust
// safe_search = u8::from(safe_search == 3) * 2;
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
safe_search = u8::from(safe_search >= 3) * 2;
let url: String = format!( let url: String = format!(
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}", "https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",

View File

@ -47,7 +47,7 @@ impl SearchEngine for Startpage {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
_safe_search: u8, _safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> { ) -> Result<HashMap<String, SearchResult>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = format!( let url: String = format!(

View File

@ -14,7 +14,7 @@ pub mod results;
pub mod server; pub mod server;
pub mod templates; pub mod templates;
use std::{net::TcpListener, sync::OnceLock}; use std::net::TcpListener;
use crate::server::router; use crate::server::router;
@ -31,9 +31,6 @@ use cache::cacher::{Cacher, SharedCache};
use config::parser::Config; use config::parser::Config;
use handler::{file_path, FileType}; use handler::{file_path, FileType};
/// A static constant for holding the cache struct.
static SHARED_CACHE: OnceLock<SharedCache> = OnceLock::new();
/// Runs the web server on the provided TCP listener and returns a `Server` instance. /// Runs the web server on the provided TCP listener and returns a `Server` instance.
/// ///
/// # Arguments /// # Arguments
@ -47,29 +44,27 @@ static SHARED_CACHE: OnceLock<SharedCache> = OnceLock::new();
/// # Example /// # Example
/// ///
/// ```rust /// ```rust
/// use std::{net::TcpListener, sync::OnceLock}; /// use std::net::TcpListener;
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache}; /// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
/// ///
/// /// A static constant for holding the parsed config.
/// static CONFIG: OnceLock<Config> = OnceLock::new();
///
/// #[tokio::main] /// #[tokio::main]
/// async fn main(){ /// async fn main(){
/// // Initialize the parsed config globally. /// let config = Config::parse(true).unwrap();
/// let config = CONFIG.get_or_init(|| Config::parse(true).unwrap());
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address"); /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
/// let cache = create_cache(config).await; /// let cache = create_cache(&config).await;
/// let server = run(listener,&config,cache).expect("Failed to start server"); /// let server = run(listener,config,cache).expect("Failed to start server");
/// } /// }
/// ``` /// ```
pub fn run( pub fn run(
listener: TcpListener, listener: TcpListener,
config: &'static Config, config: Config,
cache: impl Cacher + 'static, cache: impl Cacher + 'static,
) -> std::io::Result<Server> { ) -> std::io::Result<Server> {
let public_folder_path: &str = file_path(FileType::Theme)?; let public_folder_path: &str = file_path(FileType::Theme)?;
let cache = SHARED_CACHE.get_or_init(|| SharedCache::new(cache)); let cloned_config_threads_opt: u8 = config.threads;
let cache = web::Data::new(SharedCache::new(cache));
let server = HttpServer::new(move || { let server = HttpServer::new(move || {
let cors: Cors = Cors::default() let cors: Cors = Cors::default()
@ -86,8 +81,8 @@ pub fn run(
// Compress the responses provided by the server for the client requests. // Compress the responses provided by the server for the client requests.
.wrap(Compress::default()) .wrap(Compress::default())
.wrap(Logger::default()) // added logging middleware for logging. .wrap(Logger::default()) // added logging middleware for logging.
.app_data(web::Data::new(config)) .app_data(web::Data::new(config.clone()))
.app_data(web::Data::new(cache)) .app_data(cache.clone())
.wrap(cors) .wrap(cors)
.wrap(Governor::new( .wrap(Governor::new(
&GovernorConfigBuilder::default() &GovernorConfigBuilder::default()
@ -112,7 +107,7 @@ pub fn run(
.service(router::settings) // settings page .service(router::settings) // settings page
.default_service(web::route().to(router::not_found)) // error page .default_service(web::route().to(router::not_found)) // error page
}) })
.workers(config.threads as usize) .workers(cloned_config_threads_opt as usize)
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080. // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
.listen(listener)? .listen(listener)?
.run(); .run();

View File

@ -154,8 +154,8 @@ impl SearchResults {
} }
/// A setter function that sets the filtered to true. /// A setter function that sets the filtered to true.
pub fn set_filtered(&mut self, filtered: bool) { pub fn set_filtered(&mut self) {
self.filtered = filtered; self.filtered = true;
} }
/// A getter function that gets the value of `engine_errors_info`. /// A getter function that gets the value of `engine_errors_info`.

View File

@ -4,7 +4,7 @@
use super::aggregation_models::SearchResult; use super::aggregation_models::SearchResult;
use error_stack::{Report, Result, ResultExt}; use error_stack::{Report, Result, ResultExt};
use reqwest::Client; use reqwest::Client;
use std::fmt; use std::{collections::HashMap, fmt};
/// A custom error type used for handle engine associated errors. /// A custom error type used for handle engine associated errors.
#[derive(Debug)] #[derive(Debug)]
@ -147,7 +147,7 @@ pub trait SearchEngine: Sync + Send {
user_agent: &str, user_agent: &str,
client: &Client, client: &Client,
safe_search: u8, safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError>; ) -> Result<HashMap<String, SearchResult>, EngineError>;
} }
/// A named struct which stores the engine struct with the name of the associated engine. /// A named struct which stores the engine struct with the name of the associated engine.

View File

@ -10,7 +10,7 @@
/// order to allow the deserializing the json back to struct in aggregate function in /// order to allow the deserializing the json back to struct in aggregate function in
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
/// it to the template files. /// it to the template files.
#[derive(Default)] #[derive(Clone, Default)]
pub struct Style { pub struct Style {
/// It stores the parsed theme option used to set a theme for the website. /// It stores the parsed theme option used to set a theme for the website.
pub theme: String, pub theme: String,
@ -40,6 +40,7 @@ impl Style {
} }
/// Configuration options for the aggregator. /// Configuration options for the aggregator.
#[derive(Clone)]
pub struct AggregatorConfig { pub struct AggregatorConfig {
/// It stores the option to whether enable or disable random delays between /// It stores the option to whether enable or disable random delays between
/// requests. /// requests.
@ -47,6 +48,7 @@ pub struct AggregatorConfig {
} }
/// Configuration options for the rate limiter middleware. /// Configuration options for the rate limiter middleware.
#[derive(Clone)]
pub struct RateLimiter { pub struct RateLimiter {
/// The number of request that are allowed within a provided time limit. /// The number of request that are allowed within a provided time limit.
pub number_of_requests: u8, pub number_of_requests: u8,

View File

@ -9,25 +9,22 @@ use crate::models::{
engine_models::{EngineError, EngineHandler}, engine_models::{EngineError, EngineHandler},
}; };
use error_stack::Report; use error_stack::Report;
use futures::stream::FuturesUnordered;
use regex::Regex; use regex::Regex;
use reqwest::{Client, ClientBuilder}; use reqwest::{Client, ClientBuilder};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use tokio::{ use std::{
fs::File, collections::HashMap,
io::{AsyncBufReadExt, BufReader}, io::{BufReader, Read},
task::JoinHandle,
time::Duration, time::Duration,
}; };
use std::{fs::File, io::BufRead};
use tokio::task::JoinHandle;
/// A constant for holding the prebuilt Client globally in the app. /// A constant for holding the prebuilt Client globally in the app.
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new(); static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
/// Aliases for long type annotations /// Aliases for long type annotations
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
type FutureVec =
FuturesUnordered<JoinHandle<Result<Vec<(String, SearchResult)>, Report<EngineError>>>>;
/// The function aggregates the scraped results from the user-selected upstream search engines. /// The function aggregates the scraped results from the user-selected upstream search engines.
/// These engines can be chosen either from the user interface (UI) or from the configuration file. /// These engines can be chosen either from the user interface (UI) or from the configuration file.
@ -40,7 +37,7 @@ type FutureVec =
/// ///
/// Additionally, the function eliminates duplicate results. If two results are identified as coming from /// Additionally, the function eliminates duplicate results. If two results are identified as coming from
/// multiple engines, their names are combined to indicate that the results were fetched from these upstream /// multiple engines, their names are combined to indicate that the results were fetched from these upstream
/// engines. After this, all the data in the `Vec` is removed and placed into a struct that contains all /// engines. After this, all the data in the `HashMap` is removed and placed into a struct that contains all
/// the aggregated results in a vector. Furthermore, the query used is also added to the struct. This step is /// the aggregated results in a vector. Furthermore, the query used is also added to the struct. This step is
/// necessary to ensure that the search bar in the search remains populated even when searched from the query URL. /// necessary to ensure that the search bar in the search remains populated even when searched from the query URL.
/// ///
@ -97,22 +94,15 @@ pub async fn aggregate(
let mut names: Vec<&str> = Vec::with_capacity(0); let mut names: Vec<&str> = Vec::with_capacity(0);
// create tasks for upstream result fetching // create tasks for upstream result fetching
let tasks: FutureVec = FutureVec::new(); let mut tasks: FutureVec = FutureVec::new();
let query: Arc<String> = Arc::new(query.to_string());
for engine_handler in upstream_search_engines { for engine_handler in upstream_search_engines {
let (name, search_engine) = engine_handler.clone().into_name_engine(); let (name, search_engine) = engine_handler.to_owned().into_name_engine();
names.push(name); names.push(name);
let query_partially_cloned = query.clone(); let query: String = query.to_owned();
tasks.push(tokio::spawn(async move { tasks.push(tokio::spawn(async move {
search_engine search_engine
.results( .results(&query, page, user_agent, client, safe_search)
&query_partially_cloned,
page,
user_agent,
client,
safe_search,
)
.await .await
})); }));
} }
@ -127,7 +117,7 @@ pub async fn aggregate(
} }
// aggregate search results, removing duplicates and handling errors the upstream engines returned // aggregate search results, removing duplicates and handling errors the upstream engines returned
let mut result_map: Vec<(String, SearchResult)> = Vec::new(); let mut result_map: HashMap<String, SearchResult> = HashMap::new();
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new(); let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| { let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
@ -144,45 +134,51 @@ pub async fn aggregate(
if result_map.is_empty() { if result_map.is_empty() {
match response { match response {
Ok(results) => result_map = results, Ok(results) => {
Err(error) => handle_error(&error, engine), result_map = results.clone();
}; }
Err(error) => {
handle_error(&error, engine);
}
}
continue; continue;
} }
match response { match response {
Ok(result) => { Ok(result) => {
result.into_iter().for_each(|(key, value)| { result.into_iter().for_each(|(key, value)| {
match result_map.iter().find(|(key_s, _)| key_s == &key) { result_map
Some(value) => value.1.to_owned().add_engines(engine), .entry(key)
None => result_map.push((key, value)), .and_modify(|result| {
}; result.add_engines(engine);
})
.or_insert_with(|| -> SearchResult { value });
}); });
} }
Err(error) => handle_error(&error, engine), Err(error) => {
}; handle_error(&error, engine);
}
}
} }
if safe_search >= 3 { if safe_search >= 3 {
let mut blacklist_map: Vec<(String, SearchResult)> = Vec::new(); let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
filter_with_lists( filter_with_lists(
&mut result_map, &mut result_map,
&mut blacklist_map, &mut blacklist_map,
file_path(FileType::BlockList)?, file_path(FileType::BlockList)?,
) )?;
.await?;
filter_with_lists( filter_with_lists(
&mut blacklist_map, &mut blacklist_map,
&mut result_map, &mut result_map,
file_path(FileType::AllowList)?, file_path(FileType::AllowList)?,
) )?;
.await?;
drop(blacklist_map); drop(blacklist_map);
} }
let results: Vec<SearchResult> = result_map.iter().map(|(_, value)| value.clone()).collect(); let results: Vec<SearchResult> = result_map.into_values().collect();
Ok(SearchResults::new(results, &engine_errors_info)) Ok(SearchResults::new(results, &engine_errors_info))
} }
@ -191,41 +187,35 @@ pub async fn aggregate(
/// ///
/// # Arguments /// # Arguments
/// ///
/// * `map_to_be_filtered` - A mutable reference to a `Vec` of search results to filter, where the filtered results will be removed from. /// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
/// * `resultant_map` - A mutable reference to a `Vec` to hold the filtered results. /// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering. /// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
/// ///
/// # Errors /// # Errors
/// ///
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid. /// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
pub async fn filter_with_lists( pub fn filter_with_lists(
map_to_be_filtered: &mut Vec<(String, SearchResult)>, map_to_be_filtered: &mut HashMap<String, SearchResult>,
resultant_map: &mut Vec<(String, SearchResult)>, resultant_map: &mut HashMap<String, SearchResult>,
file_path: &str, file_path: &str,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
let reader = BufReader::new(File::open(file_path).await?); let mut reader = BufReader::new(File::open(file_path)?);
let mut lines = reader.lines();
while let Some(line) = lines.next_line().await? { for line in reader.by_ref().lines() {
let re = Regex::new(line.trim())?; let re = Regex::new(line?.trim())?;
let mut length = map_to_be_filtered.len();
let mut idx: usize = Default::default();
// Iterate over each search result in the map and check if it matches the regex pattern // Iterate over each search result in the map and check if it matches the regex pattern
while idx < length { for (url, search_result) in map_to_be_filtered.clone().into_iter() {
let ele = &map_to_be_filtered[idx]; if re.is_match(&url.to_lowercase())
let ele_inner = &ele.1; || re.is_match(&search_result.title.to_lowercase())
match re.is_match(&ele.0.to_lowercase()) || re.is_match(&search_result.description.to_lowercase())
|| re.is_match(&ele_inner.title.to_lowercase())
|| re.is_match(&ele_inner.description.to_lowercase())
{ {
true => {
// If the search result matches the regex pattern, move it from the original map to the resultant map // If the search result matches the regex pattern, move it from the original map to the resultant map
resultant_map.push(map_to_be_filtered.swap_remove(idx)); resultant_map.insert(
length -= 1; url.to_owned(),
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
);
} }
false => idx += 1,
};
} }
} }
@ -236,14 +226,15 @@ pub async fn filter_with_lists(
mod tests { mod tests {
use super::*; use super::*;
use smallvec::smallvec; use smallvec::smallvec;
use std::collections::HashMap;
use std::io::Write; use std::io::Write;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
#[tokio::test] #[test]
async fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> { fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
// Create a map of search results to filter // Create a map of search results to filter
let mut map_to_be_filtered = Vec::new(); let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.push(( map_to_be_filtered.insert(
"https://www.example.com".to_owned(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_owned(), title: "Example Domain".to_owned(),
@ -252,15 +243,15 @@ mod tests {
.to_owned(), .to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
)); );
map_to_be_filtered.push(( map_to_be_filtered.insert(
"https://www.rust-lang.org/".to_owned(), "https://www.rust-lang.org/".to_owned(),
SearchResult { SearchResult {
title: "Rust Programming Language".to_owned(), title: "Rust Programming Language".to_owned(),
url: "https://www.rust-lang.org/".to_owned(), url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
},) },
); );
// Create a temporary file with regex patterns // Create a temporary file with regex patterns
@ -269,30 +260,25 @@ mod tests {
writeln!(file, "rust")?; writeln!(file, "rust")?;
file.flush()?; file.flush()?;
let mut resultant_map = Vec::new(); let mut resultant_map = HashMap::new();
filter_with_lists( filter_with_lists(
&mut map_to_be_filtered, &mut map_to_be_filtered,
&mut resultant_map, &mut resultant_map,
file.path().to_str().unwrap(), file.path().to_str().unwrap(),
) )?;
.await?;
assert_eq!(resultant_map.len(), 2); assert_eq!(resultant_map.len(), 2);
assert!(resultant_map assert!(resultant_map.contains_key("https://www.example.com"));
.iter() assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
.any(|(key, _)| key == "https://www.example.com"));
assert!(resultant_map
.iter()
.any(|(key, _)| key == "https://www.rust-lang.org/"));
assert_eq!(map_to_be_filtered.len(), 0); assert_eq!(map_to_be_filtered.len(), 0);
Ok(()) Ok(())
} }
#[tokio::test] #[test]
async fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> { fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
let mut map_to_be_filtered = Vec::new(); let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.push(( map_to_be_filtered.insert(
"https://www.example.com".to_owned(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_owned(), title: "Example Domain".to_owned(),
@ -301,8 +287,8 @@ mod tests {
.to_owned(), .to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
)); );
map_to_be_filtered.push(( map_to_be_filtered.insert(
"https://www.rust-lang.org/".to_owned(), "https://www.rust-lang.org/".to_owned(),
SearchResult { SearchResult {
title: "Rust Programming Language".to_owned(), title: "Rust Programming Language".to_owned(),
@ -310,39 +296,34 @@ mod tests {
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
}, },
)); );
// Create a temporary file with a regex pattern containing a wildcard // Create a temporary file with a regex pattern containing a wildcard
let mut file = NamedTempFile::new()?; let mut file = NamedTempFile::new()?;
writeln!(file, "ex.*le")?; writeln!(file, "ex.*le")?;
file.flush()?; file.flush()?;
let mut resultant_map = Vec::new(); let mut resultant_map = HashMap::new();
filter_with_lists( filter_with_lists(
&mut map_to_be_filtered, &mut map_to_be_filtered,
&mut resultant_map, &mut resultant_map,
file.path().to_str().unwrap(), file.path().to_str().unwrap(),
) )?;
.await?;
assert_eq!(resultant_map.len(), 1); assert_eq!(resultant_map.len(), 1);
assert!(resultant_map assert!(resultant_map.contains_key("https://www.example.com"));
.iter()
.any(|(key, _)| key == "https://www.example.com"));
assert_eq!(map_to_be_filtered.len(), 1); assert_eq!(map_to_be_filtered.len(), 1);
assert!(map_to_be_filtered assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
.iter()
.any(|(key, _)| key == "https://www.rust-lang.org/"));
Ok(()) Ok(())
} }
#[tokio::test] #[test]
async fn test_filter_with_lists_file_not_found() { fn test_filter_with_lists_file_not_found() {
let mut map_to_be_filtered = Vec::new(); let mut map_to_be_filtered = HashMap::new();
let mut resultant_map = Vec::new(); let mut resultant_map = HashMap::new();
// Call the `filter_with_lists` function with a non-existent file path // Call the `filter_with_lists` function with a non-existent file path
let result = filter_with_lists( let result = filter_with_lists(
@ -351,13 +332,13 @@ mod tests {
"non-existent-file.txt", "non-existent-file.txt",
); );
assert!(result.await.is_err()); assert!(result.is_err());
} }
#[tokio::test] #[test]
async fn test_filter_with_lists_invalid_regex() { fn test_filter_with_lists_invalid_regex() {
let mut map_to_be_filtered = Vec::new(); let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.push(( map_to_be_filtered.insert(
"https://www.example.com".to_owned(), "https://www.example.com".to_owned(),
SearchResult { SearchResult {
title: "Example Domain".to_owned(), title: "Example Domain".to_owned(),
@ -366,9 +347,9 @@ mod tests {
.to_owned(), .to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()], engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
}, },
)); );
let mut resultant_map = Vec::new(); let mut resultant_map = HashMap::new();
// Create a temporary file with an invalid regex pattern // Create a temporary file with an invalid regex pattern
let mut file = NamedTempFile::new().unwrap(); let mut file = NamedTempFile::new().unwrap();
@ -381,6 +362,6 @@ mod tests {
file.path().to_str().unwrap(), file.path().to_str().unwrap(),
); );
assert!(result.await.is_err()); assert!(result.is_err());
} }
} }

View File

@ -7,13 +7,11 @@ use crate::{
handler::{file_path, FileType}, handler::{file_path, FileType},
}; };
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse}; use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use tokio::fs::read_to_string; use std::fs::read_to_string;
/// Handles the route of index page or main page of the `websurfx` meta search engine website. /// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")] #[get("/")]
pub async fn index( pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::index::index( crate::templates::views::index::index(
&config.style.colorscheme, &config.style.colorscheme,
@ -27,7 +25,7 @@ pub async fn index(
/// Handles the route of any other accessed route/page which is not provided by the /// Handles the route of any other accessed route/page which is not provided by the
/// website essentially the 404 error page. /// website essentially the 404 error page.
pub async fn not_found( pub async fn not_found(
config: web::Data<&'static Config>, config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::not_found::not_found( crate::templates::views::not_found::not_found(
@ -43,7 +41,7 @@ pub async fn not_found(
#[get("/robots.txt")] #[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> { pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = let page_content: String =
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?)).await?; read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
Ok(HttpResponse::Ok() Ok(HttpResponse::Ok()
.content_type(ContentType::plaintext()) .content_type(ContentType::plaintext())
.body(page_content)) .body(page_content))
@ -51,9 +49,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
/// Handles the route of about page of the `websurfx` meta search engine website. /// Handles the route of about page of the `websurfx` meta search engine website.
#[get("/about")] #[get("/about")]
pub async fn about( pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::about::about( crate::templates::views::about::about(
&config.style.colorscheme, &config.style.colorscheme,
@ -67,7 +63,7 @@ pub async fn about(
/// Handles the route of settings page of the `websurfx` meta search engine website. /// Handles the route of settings page of the `websurfx` meta search engine website.
#[get("/settings")] #[get("/settings")]
pub async fn settings( pub async fn settings(
config: web::Data<&'static Config>, config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body( Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::settings::settings( crate::templates::views::settings::settings(

View File

@ -13,12 +13,12 @@ use crate::{
}; };
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse}; use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use regex::Regex; use regex::Regex;
use std::borrow::Cow; use std::{
use tokio::{ borrow::Cow,
fs::File, fs::File,
io::{AsyncBufReadExt, BufReader}, io::{BufRead, BufReader, Read},
join,
}; };
use tokio::join;
/// Handles the route of search page of the `websurfx` meta search engine website and it takes /// Handles the route of search page of the `websurfx` meta search engine website and it takes
/// two search url parameters `q` and `page` where `page` parameter is optional. /// two search url parameters `q` and `page` where `page` parameter is optional.
@ -37,8 +37,8 @@ use tokio::{
#[get("/search")] #[get("/search")]
pub async fn search( pub async fn search(
req: HttpRequest, req: HttpRequest,
config: web::Data<&'static Config>, config: web::Data<Config>,
cache: web::Data<&'static SharedCache>, cache: web::Data<SharedCache>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
use std::sync::Arc; use std::sync::Arc;
let params = web::Query::<SearchParams>::from_query(req.query_string())?; let params = web::Query::<SearchParams>::from_query(req.query_string())?;
@ -70,8 +70,8 @@ pub async fn search(
}); });
search_settings.safe_search_level = get_safesearch_level( search_settings.safe_search_level = get_safesearch_level(
params.safesearch, &Some(search_settings.safe_search_level),
search_settings.safe_search_level, &params.safesearch,
config.safe_search, config.safe_search,
); );
@ -158,8 +158,8 @@ pub async fn search(
/// It returns the `SearchResults` struct if the search results could be successfully fetched from /// It returns the `SearchResults` struct if the search results could be successfully fetched from
/// the cache or from the upstream search engines otherwise it returns an appropriate error. /// the cache or from the upstream search engines otherwise it returns an appropriate error.
async fn results( async fn results(
config: &'static Config, config: &Config,
cache: &'static SharedCache, cache: &web::Data<SharedCache>,
query: &str, query: &str,
page: u32, page: u32,
search_settings: &server_models::Cookie<'_>, search_settings: &server_models::Cookie<'_>,
@ -188,7 +188,7 @@ async fn results(
let mut results: SearchResults = SearchResults::default(); let mut results: SearchResults = SearchResults::default();
let flag: bool = let flag: bool =
!is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?; !is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
// Return early when query contains disallowed words, // Return early when query contains disallowed words,
if flag { if flag {
results.set_disallowed(); results.set_disallowed();
@ -225,12 +225,12 @@ async fn results(
search_results search_results
} }
}; };
let (engine_errors_info, results_empty_check, no_engines_selected) = ( if results.engine_errors_info().is_empty()
results.engine_errors_info().is_empty(), && results.results().is_empty()
results.results().is_empty(), && !results.no_engines_selected()
results.no_engines_selected(), {
); results.set_filtered();
results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected); }
cache cache
.cache_results(&[results.clone()], &[cache_key.clone()]) .cache_results(&[results.clone()], &[cache_key.clone()])
.await?; .await?;
@ -252,14 +252,13 @@ async fn results(
/// ///
/// Returns a bool indicating whether the results were found in the list or not on success /// Returns a bool indicating whether the results were found in the list or not on success
/// otherwise returns a standard error type on a failure. /// otherwise returns a standard error type on a failure.
async fn is_match_from_filter_list( fn is_match_from_filter_list(
file_path: &str, file_path: &str,
query: &str, query: &str,
) -> Result<bool, Box<dyn std::error::Error>> { ) -> Result<bool, Box<dyn std::error::Error>> {
let reader = BufReader::new(File::open(file_path).await?); let mut reader = BufReader::new(File::open(file_path)?);
let mut lines = reader.lines(); for line in reader.by_ref().lines() {
while let Some(line) = lines.next_line().await? { let re = Regex::new(&line?)?;
let re = Regex::new(&line)?;
if re.is_match(query) { if re.is_match(query) {
return Ok(true); return Ok(true);
} }
@ -268,95 +267,24 @@ async fn is_match_from_filter_list(
Ok(false) Ok(false)
} }
/// A helper function to choose the safe search level value based on the URL parameters, /// A helper function to modify the safe search level based on the url params.
/// cookie value and config value. /// The `safe_search` is the one in the user's cookie or
/// the default set by the server config if the cookie was missing.
/// ///
/// # Argurments /// # Argurments
/// ///
/// * `safe_search_level_from_url` - Safe search level from the URL parameters. /// * `url_level` - Safe search level from the url.
/// * `cookie_safe_search_level` - Safe search level value from the cookie. /// * `safe_search` - User's cookie, or the safe search level set by the server
/// * `config_safe_search_level` - Safe search level value from the config file. /// * `config_level` - Safe search level to fall back to
/// fn get_safesearch_level(cookie_level: &Option<u8>, url_level: &Option<u8>, config_level: u8) -> u8 {
/// # Returns match url_level {
/// Some(url_level) => {
/// Returns an appropriate safe search level value based on the safe search level values if *url_level >= 3 {
/// from the URL parameters, cookie and the config file. config_level
fn get_safesearch_level(
safe_search_level_from_url: Option<u8>,
cookie_safe_search_level: u8,
config_safe_search_level: u8,
) -> u8 {
(u8::from(safe_search_level_from_url.is_some())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * safe_search_level_from_url.unwrap_or(0))))
+ (u8::from(safe_search_level_from_url.is_none())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)))
}
#[cfg(test)]
mod tests {
use std::time::{SystemTime, UNIX_EPOCH};
/// A helper function which creates a random mock safe search level value.
///
/// # Returns
///
/// Returns an optional u8 value.
fn mock_safe_search_level_value() -> Option<u8> {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.subsec_nanos() as f32;
let delay = ((nanos / 1_0000_0000 as f32).floor() as i8) - 1;
match delay {
-1 => None,
some_num => Some(if some_num > 4 { some_num - 4 } else { some_num } as u8),
}
}
#[test]
/// A test function to test whether the output of the branchless and branched code
/// for the code to choose the appropriate safe search level is same or not.
fn get_safesearch_level_branched_branchless_code_test() {
// Get mock values for the safe search level values for URL parameters, cookie
// and config.
let safe_search_level_from_url = mock_safe_search_level_value();
let cookie_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
let config_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
// Branched code
let safe_search_level_value_from_branched_code = match safe_search_level_from_url {
Some(safe_search_level_from_url_parsed) => {
if config_safe_search_level >= 3 {
config_safe_search_level
} else { } else {
safe_search_level_from_url_parsed *url_level
} }
} }
None => { None => cookie_level.unwrap_or(config_level),
if config_safe_search_level >= 3 {
config_safe_search_level
} else {
cookie_safe_search_level
}
}
};
// branchless code
let safe_search_level_value_from_branchless_code =
(u8::from(safe_search_level_from_url.is_some())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3)
* safe_search_level_from_url.unwrap_or(0))))
+ (u8::from(safe_search_level_from_url.is_none())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)));
assert_eq!(
safe_search_level_value_from_branched_code,
safe_search_level_value_from_branchless_code
);
} }
} }

View File

@ -55,7 +55,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
input type="checkbox" class="engine" checked; input type="checkbox" class="engine" checked;
span class="slider round"{} span class="slider round"{}
} }
(format!("{}{}",&engine_name[..1].to_uppercase(), &engine_name[1..])) (format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned()))
} }
} }
@else { @else {
@ -64,7 +64,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
input type="checkbox" class="engine"; input type="checkbox" class="engine";
span class="slider round"{} span class="slider round"{}
} }
(format!("{}{}",&engine_name[..1], &engine_name[1..])) (format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned()))
} }
} }
} }

View File

@ -36,7 +36,7 @@ fn style_option_list(
} }
if style_type == "animations" { if style_type == "animations" {
style_option_names.push((String::default(), "none".to_owned())) style_option_names.push(("".to_owned(), "none".to_owned()))
} }
Ok(style_option_names) Ok(style_option_names)
@ -83,11 +83,9 @@ pub fn user_interface(
"Select the animation for your theme to be used in user interface" "Select the animation for your theme to be used in user interface"
} }
select name="animations"{ select name="animations"{
@let default_animation = &String::default();
@let animation = animation.as_ref().unwrap_or(default_animation);
// Sets the user selected animation name from the config file as the first option in the selection list. // Sets the user selected animation name from the config file as the first option in the selection list.
option value=(animation){(animation.replace('-'," "))} option value=(animation.as_ref().unwrap_or(&"".to_owned())){(animation.as_ref().unwrap_or(&"".to_owned()).replace('-'," "))}
@for (k,v) in style_option_list("animations", animation)?{ @for (k,v) in style_option_list("animations", animation.as_ref().unwrap_or(&"".to_owned()))?{
option value=(k){(v)} option value=(k){(v)}
} }
} }

View File

@ -38,7 +38,7 @@ pub fn search(
small{(result.url)} small{(result.url)}
p{(PreEscaped(&result.description))} p{(PreEscaped(&result.description))}
.upstream_engines{ .upstream_engines{
@for name in &result.engine { @for name in result.clone().engine{
span{(name)} span{(name)}
} }
} }

View File

@ -1,17 +1,14 @@
use std::{net::TcpListener, sync::OnceLock}; use std::net::TcpListener;
use websurfx::{config::parser::Config, run, templates::views}; use websurfx::{config::parser::Config, run, templates::views};
/// A static constant for holding the parsed config.
static CONFIG: OnceLock<Config> = OnceLock::new();
// Starts a new instance of the HTTP server, bound to a random available port // Starts a new instance of the HTTP server, bound to a random available port
async fn spawn_app() -> String { async fn spawn_app() -> String {
// Binding to port 0 will trigger the OS to assign a port for us. // Binding to port 0 will trigger the OS to assign a port for us.
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port"); let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
let port = listener.local_addr().unwrap().port(); let port = listener.local_addr().unwrap().port();
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap()); let config = Config::parse(false).unwrap();
let cache = websurfx::cache::cacher::create_cache(config).await; let cache = websurfx::cache::cacher::create_cache(&config).await;
let server = run(listener, config, cache).expect("Failed to bind address"); let server = run(listener, config, cache).expect("Failed to bind address");
tokio::spawn(server); tokio::spawn(server);