mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 22:18:23 -05:00
Merge e385a577e0
into 8d9b660eb1
This commit is contained in:
commit
7e9f8956fa
617
Cargo.lock
generated
617
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
12
Cargo.toml
12
Cargo.toml
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "1.9.20"
|
version = "1.10.9"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||||
repository = "https://github.com/neon-mmd/websurfx"
|
repository = "https://github.com/neon-mmd/websurfx"
|
||||||
@ -14,7 +14,7 @@ path = "src/bin/websurfx.rs"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
|
reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
|
||||||
tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false}
|
tokio = {version="1.32.0",features=["rt-multi-thread","macros", "fs", "io-util"], default-features = false}
|
||||||
serde = {version="1.0.196", default-features=false, features=["derive"]}
|
serde = {version="1.0.196", default-features=false, features=["derive"]}
|
||||||
serde_json = {version="1.0.109", default-features=false}
|
serde_json = {version="1.0.109", default-features=false}
|
||||||
maud = {version="0.25.0", default-features=false, features=["actix-web"]}
|
maud = {version="0.25.0", default-features=false, features=["actix-web"]}
|
||||||
@ -32,13 +32,13 @@ error-stack = {version="0.4.0", default-features=false, features=["std"]}
|
|||||||
async-trait = {version="0.1.76", default-features=false}
|
async-trait = {version="0.1.76", default-features=false}
|
||||||
regex = {version="1.9.4", features=["perf"], default-features = false}
|
regex = {version="1.9.4", features=["perf"], default-features = false}
|
||||||
smallvec = {version="1.13.1", features=["union", "serde"], default-features=false}
|
smallvec = {version="1.13.1", features=["union", "serde"], default-features=false}
|
||||||
futures = {version="0.3.28", default-features=false}
|
futures = {version="0.3.30", default-features=false, features=["alloc"]}
|
||||||
dhat = {version="0.3.3", optional = true, default-features=false}
|
dhat = {version="0.3.2", optional = true, default-features=false}
|
||||||
mimalloc = { version = "0.1.38", default-features = false }
|
mimalloc = { version = "0.1.38", default-features = false }
|
||||||
async-once-cell = {version="0.5.3", default-features=false}
|
async-once-cell = {version="0.5.3", default-features=false}
|
||||||
actix-governor = {version="0.5.0", default-features=false}
|
actix-governor = {version="0.5.0", default-features=false}
|
||||||
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
|
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
|
||||||
brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true}
|
async-compression = { version = "0.4.6", default-features = false, features=["brotli","tokio"], optional=true}
|
||||||
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
|
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
|
||||||
chacha20 = {version="0.9.1", default-features=false, optional=true}
|
chacha20 = {version="0.9.1", default-features=false, optional=true}
|
||||||
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
|
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
|
||||||
@ -84,7 +84,7 @@ default = ["memory-cache"]
|
|||||||
dhat-heap = ["dep:dhat"]
|
dhat-heap = ["dep:dhat"]
|
||||||
memory-cache = ["dep:mini-moka"]
|
memory-cache = ["dep:mini-moka"]
|
||||||
redis-cache = ["dep:redis","dep:base64"]
|
redis-cache = ["dep:redis","dep:base64"]
|
||||||
compress-cache-results = ["dep:brotli","dep:cfg-if"]
|
compress-cache-results = ["dep:async-compression","dep:cfg-if"]
|
||||||
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
|
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
|
||||||
cec-cache-results = ["compress-cache-results","encrypt-cache-results"]
|
cec-cache-results = ["compress-cache-results","encrypt-cache-results"]
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#[cfg(not(feature = "dhat-heap"))]
|
#[cfg(not(feature = "dhat-heap"))]
|
||||||
use mimalloc::MiMalloc;
|
use mimalloc::MiMalloc;
|
||||||
|
|
||||||
use std::net::TcpListener;
|
use std::{net::TcpListener, sync::OnceLock};
|
||||||
use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
|
use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
|
||||||
|
|
||||||
/// A dhat heap memory profiler
|
/// A dhat heap memory profiler
|
||||||
@ -17,6 +17,9 @@ static ALLOC: dhat::Alloc = dhat::Alloc;
|
|||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static GLOBAL: MiMalloc = MiMalloc;
|
static GLOBAL: MiMalloc = MiMalloc;
|
||||||
|
|
||||||
|
/// A static constant for holding the parsed config.
|
||||||
|
static CONFIG: OnceLock<Config> = OnceLock::new();
|
||||||
|
|
||||||
/// The function that launches the main server and registers all the routes of the website.
|
/// The function that launches the main server and registers all the routes of the website.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
@ -29,10 +32,10 @@ async fn main() -> std::io::Result<()> {
|
|||||||
#[cfg(feature = "dhat-heap")]
|
#[cfg(feature = "dhat-heap")]
|
||||||
let _profiler = dhat::Profiler::new_heap();
|
let _profiler = dhat::Profiler::new_heap();
|
||||||
|
|
||||||
// Initialize the parsed config file.
|
// Initialize the parsed config globally.
|
||||||
let config = Config::parse(false).unwrap();
|
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
|
||||||
|
|
||||||
let cache = create_cache(&config).await;
|
let cache = create_cache(config).await;
|
||||||
|
|
||||||
log::info!(
|
log::info!(
|
||||||
"started server on port {} and IP {}",
|
"started server on port {} and IP {}",
|
||||||
@ -45,7 +48,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
config.port,
|
config.port,
|
||||||
);
|
);
|
||||||
|
|
||||||
let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?;
|
let listener = TcpListener::bind((config.binding_ip.as_str(), config.port))?;
|
||||||
|
|
||||||
run(listener, config, cache)?.await
|
run(listener, config, cache)?.await
|
||||||
}
|
}
|
||||||
|
68
src/cache/cacher.rs
vendored
68
src/cache/cacher.rs
vendored
@ -93,7 +93,7 @@ pub trait Cacher: Send + Sync {
|
|||||||
feature = "encrypt-cache-results",
|
feature = "encrypt-cache-results",
|
||||||
feature = "cec-cache-results"
|
feature = "cec-cache-results"
|
||||||
))]
|
))]
|
||||||
fn encrypt_or_decrypt_results(
|
async fn encrypt_or_decrypt_results(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut bytes: Vec<u8>,
|
mut bytes: Vec<u8>,
|
||||||
encrypt: bool,
|
encrypt: bool,
|
||||||
@ -137,11 +137,19 @@ pub trait Cacher: Send + Sync {
|
|||||||
/// Returns the compressed bytes on success otherwise it returns a CacheError
|
/// Returns the compressed bytes on success otherwise it returns a CacheError
|
||||||
/// on failure.
|
/// on failure.
|
||||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||||
fn compress_results(&mut self, mut bytes: Vec<u8>) -> Result<Vec<u8>, Report<CacheError>> {
|
async fn compress_results(
|
||||||
use std::io::Write;
|
&mut self,
|
||||||
let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22);
|
mut bytes: Vec<u8>,
|
||||||
|
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
|
||||||
writer
|
writer
|
||||||
.write_all(&bytes)
|
.write_all(&bytes)
|
||||||
|
.await
|
||||||
|
.map_err(|_| CacheError::CompressionError)?;
|
||||||
|
writer
|
||||||
|
.shutdown()
|
||||||
|
.await
|
||||||
.map_err(|_| CacheError::CompressionError)?;
|
.map_err(|_| CacheError::CompressionError)?;
|
||||||
bytes = writer.into_inner();
|
bytes = writer.into_inner();
|
||||||
Ok(bytes)
|
Ok(bytes)
|
||||||
@ -159,17 +167,17 @@ pub trait Cacher: Send + Sync {
|
|||||||
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
|
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
|
||||||
/// on failure.
|
/// on failure.
|
||||||
#[cfg(feature = "cec-cache-results")]
|
#[cfg(feature = "cec-cache-results")]
|
||||||
fn compress_encrypt_compress_results(
|
async fn compress_encrypt_compress_results(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut bytes: Vec<u8>,
|
mut bytes: Vec<u8>,
|
||||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||||
// compress first
|
// compress first
|
||||||
bytes = self.compress_results(bytes)?;
|
bytes = self.compress_results(bytes).await?;
|
||||||
// encrypt
|
// encrypt
|
||||||
bytes = self.encrypt_or_decrypt_results(bytes, true)?;
|
bytes = self.encrypt_or_decrypt_results(bytes, true).await?;
|
||||||
|
|
||||||
// compress again;
|
// compress again;
|
||||||
bytes = self.compress_results(bytes)?;
|
bytes = self.compress_results(bytes).await?;
|
||||||
|
|
||||||
Ok(bytes)
|
Ok(bytes)
|
||||||
}
|
}
|
||||||
@ -187,11 +195,11 @@ pub trait Cacher: Send + Sync {
|
|||||||
/// on failure.
|
/// on failure.
|
||||||
|
|
||||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||||
fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(feature = "compress-cache-results")]
|
if #[cfg(feature = "compress-cache-results")]
|
||||||
{
|
{
|
||||||
decompress_util(bytes)
|
decompress_util(bytes).await
|
||||||
|
|
||||||
}
|
}
|
||||||
else if #[cfg(feature = "cec-cache-results")]
|
else if #[cfg(feature = "cec-cache-results")]
|
||||||
@ -199,7 +207,7 @@ pub trait Cacher: Send + Sync {
|
|||||||
let decompressed = decompress_util(bytes)?;
|
let decompressed = decompress_util(bytes)?;
|
||||||
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
|
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
|
||||||
|
|
||||||
decompress_util(&decrypted)
|
decompress_util(&decrypted).await
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -216,7 +224,7 @@ pub trait Cacher: Send + Sync {
|
|||||||
/// # Error
|
/// # Error
|
||||||
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
|
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
|
||||||
/// on failure.
|
/// on failure.
|
||||||
fn pre_process_search_results(
|
async fn pre_process_search_results(
|
||||||
&mut self,
|
&mut self,
|
||||||
search_results: &SearchResults,
|
search_results: &SearchResults,
|
||||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||||
@ -224,19 +232,20 @@ pub trait Cacher: Send + Sync {
|
|||||||
let mut bytes: Vec<u8> = search_results.try_into()?;
|
let mut bytes: Vec<u8> = search_results.try_into()?;
|
||||||
#[cfg(feature = "compress-cache-results")]
|
#[cfg(feature = "compress-cache-results")]
|
||||||
{
|
{
|
||||||
let compressed = self.compress_results(bytes)?;
|
let compressed = self.compress_results(bytes).await?;
|
||||||
bytes = compressed;
|
bytes = compressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "encrypt-cache-results")]
|
#[cfg(feature = "encrypt-cache-results")]
|
||||||
{
|
{
|
||||||
let encrypted = self.encrypt_or_decrypt_results(bytes, true)?;
|
let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?;
|
||||||
bytes = encrypted;
|
bytes = encrypted;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cec-cache-results")]
|
#[cfg(feature = "cec-cache-results")]
|
||||||
{
|
{
|
||||||
let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?;
|
let compressed_encrypted_compressed =
|
||||||
|
self.compress_encrypt_compress_results(bytes).await?;
|
||||||
bytes = compressed_encrypted_compressed;
|
bytes = compressed_encrypted_compressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,25 +265,25 @@ pub trait Cacher: Send + Sync {
|
|||||||
/// on failure.
|
/// on failure.
|
||||||
|
|
||||||
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
|
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
|
||||||
fn post_process_search_results(
|
async fn post_process_search_results(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut bytes: Vec<u8>,
|
mut bytes: Vec<u8>,
|
||||||
) -> Result<SearchResults, Report<CacheError>> {
|
) -> Result<SearchResults, Report<CacheError>> {
|
||||||
#[cfg(feature = "compress-cache-results")]
|
#[cfg(feature = "compress-cache-results")]
|
||||||
{
|
{
|
||||||
let decompressed = self.decompress_results(&bytes)?;
|
let decompressed = self.decompress_results(&bytes).await?;
|
||||||
bytes = decompressed
|
bytes = decompressed
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "encrypt-cache-results")]
|
#[cfg(feature = "encrypt-cache-results")]
|
||||||
{
|
{
|
||||||
let decrypted = self.encrypt_or_decrypt_results(bytes, false)?;
|
let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?;
|
||||||
bytes = decrypted
|
bytes = decrypted
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cec-cache-results")]
|
#[cfg(feature = "cec-cache-results")]
|
||||||
{
|
{
|
||||||
let decompressed_decrypted = self.decompress_results(&bytes)?;
|
let decompressed_decrypted = self.decompress_results(&bytes).await?;
|
||||||
bytes = decompressed_decrypted;
|
bytes = decompressed_decrypted;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -295,16 +304,19 @@ pub trait Cacher: Send + Sync {
|
|||||||
/// on failure.
|
/// on failure.
|
||||||
|
|
||||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||||
fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||||
use std::io::Write;
|
use tokio::io::AsyncWriteExt;
|
||||||
let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096);
|
let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new());
|
||||||
|
|
||||||
writer
|
writer
|
||||||
.write_all(input)
|
.write_all(input)
|
||||||
|
.await
|
||||||
.map_err(|_| CacheError::CompressionError)?;
|
.map_err(|_| CacheError::CompressionError)?;
|
||||||
let bytes = writer
|
writer
|
||||||
.into_inner()
|
.shutdown()
|
||||||
|
.await
|
||||||
.map_err(|_| CacheError::CompressionError)?;
|
.map_err(|_| CacheError::CompressionError)?;
|
||||||
|
let bytes = writer.into_inner();
|
||||||
Ok(bytes)
|
Ok(bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,7 +341,7 @@ impl Cacher for RedisCache {
|
|||||||
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
|
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
|
||||||
.decode(base64_string)
|
.decode(base64_string)
|
||||||
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
|
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
|
||||||
self.post_process_search_results(bytes)
|
self.post_process_search_results(bytes).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn cache_results(
|
async fn cache_results(
|
||||||
@ -345,7 +357,7 @@ impl Cacher for RedisCache {
|
|||||||
let mut bytes = Vec::with_capacity(search_results_len);
|
let mut bytes = Vec::with_capacity(search_results_len);
|
||||||
|
|
||||||
for result in search_results {
|
for result in search_results {
|
||||||
let processed = self.pre_process_search_results(result)?;
|
let processed = self.pre_process_search_results(result).await?;
|
||||||
bytes.push(processed);
|
bytes.push(processed);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -405,7 +417,7 @@ impl Cacher for InMemoryCache {
|
|||||||
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
||||||
let hashed_url_string = self.hash_url(url);
|
let hashed_url_string = self.hash_url(url);
|
||||||
match self.cache.get(&hashed_url_string) {
|
match self.cache.get(&hashed_url_string) {
|
||||||
Some(res) => self.post_process_search_results(res),
|
Some(res) => self.post_process_search_results(res).await,
|
||||||
None => Err(Report::new(CacheError::MissingValue)),
|
None => Err(Report::new(CacheError::MissingValue)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -417,7 +429,7 @@ impl Cacher for InMemoryCache {
|
|||||||
) -> Result<(), Report<CacheError>> {
|
) -> Result<(), Report<CacheError>> {
|
||||||
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
||||||
let hashed_url_string = self.hash_url(url);
|
let hashed_url_string = self.hash_url(url);
|
||||||
let bytes = self.pre_process_search_results(search_result)?;
|
let bytes = self.pre_process_search_results(search_result).await?;
|
||||||
self.cache.insert(hashed_url_string, bytes);
|
self.cache.insert(hashed_url_string, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
37
src/cache/redis_cacher.rs
vendored
37
src/cache/redis_cacher.rs
vendored
@ -1,15 +1,16 @@
|
|||||||
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
||||||
//! from the upstream search engines in a json format.
|
//! from the upstream search engines in a json format.
|
||||||
|
|
||||||
|
use super::error::CacheError;
|
||||||
use error_stack::Report;
|
use error_stack::Report;
|
||||||
use futures::future::try_join_all;
|
use futures::stream::FuturesUnordered;
|
||||||
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
||||||
|
|
||||||
use super::error::CacheError;
|
/// A constant holding the redis pipeline size.
|
||||||
|
const REDIS_PIPELINE_SIZE: usize = 3;
|
||||||
|
|
||||||
/// A named struct which stores the redis Connection url address to which the client will
|
/// A named struct which stores the redis Connection url address to which the client will
|
||||||
/// connect to.
|
/// connect to.
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct RedisCache {
|
pub struct RedisCache {
|
||||||
/// It stores a pool of connections ready to be used.
|
/// It stores a pool of connections ready to be used.
|
||||||
connection_pool: Vec<ConnectionManager>,
|
connection_pool: Vec<ConnectionManager>,
|
||||||
@ -20,6 +21,8 @@ pub struct RedisCache {
|
|||||||
current_connection: u8,
|
current_connection: u8,
|
||||||
/// It stores the max TTL for keys.
|
/// It stores the max TTL for keys.
|
||||||
cache_ttl: u16,
|
cache_ttl: u16,
|
||||||
|
/// It stores the redis pipeline struct of size 3.
|
||||||
|
pipeline: redis::Pipeline,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RedisCache {
|
impl RedisCache {
|
||||||
@ -30,6 +33,8 @@ impl RedisCache {
|
|||||||
/// * `redis_connection_url` - It takes the redis Connection url address.
|
/// * `redis_connection_url` - It takes the redis Connection url address.
|
||||||
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
|
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
|
||||||
/// connections that should be stored in the pool).
|
/// connections that should be stored in the pool).
|
||||||
|
/// * `cache_ttl` - It takes the the time to live for cached results to live in the redis
|
||||||
|
/// server.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
///
|
///
|
||||||
@ -41,18 +46,28 @@ impl RedisCache {
|
|||||||
cache_ttl: u16,
|
cache_ttl: u16,
|
||||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
let client = Client::open(redis_connection_url)?;
|
let client = Client::open(redis_connection_url)?;
|
||||||
let mut tasks: Vec<_> = Vec::new();
|
let tasks: FuturesUnordered<_> = FuturesUnordered::new();
|
||||||
|
|
||||||
for _ in 0..pool_size {
|
for _ in 0..pool_size {
|
||||||
tasks.push(client.get_connection_manager());
|
let client_partially_cloned = client.clone();
|
||||||
|
tasks.push(tokio::spawn(async move {
|
||||||
|
client_partially_cloned.get_connection_manager().await
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut outputs = Vec::new();
|
||||||
|
for task in tasks {
|
||||||
|
outputs.push(task.await??);
|
||||||
}
|
}
|
||||||
|
|
||||||
let redis_cache = RedisCache {
|
let redis_cache = RedisCache {
|
||||||
connection_pool: try_join_all(tasks).await?,
|
connection_pool: outputs,
|
||||||
pool_size,
|
pool_size,
|
||||||
current_connection: Default::default(),
|
current_connection: Default::default(),
|
||||||
cache_ttl,
|
cache_ttl,
|
||||||
|
pipeline: redis::Pipeline::with_capacity(REDIS_PIPELINE_SIZE),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(redis_cache)
|
Ok(redis_cache)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,13 +137,14 @@ impl RedisCache {
|
|||||||
keys: impl Iterator<Item = String>,
|
keys: impl Iterator<Item = String>,
|
||||||
) -> Result<(), Report<CacheError>> {
|
) -> Result<(), Report<CacheError>> {
|
||||||
self.current_connection = Default::default();
|
self.current_connection = Default::default();
|
||||||
let mut pipeline = redis::Pipeline::with_capacity(3);
|
|
||||||
|
|
||||||
for (key, json_result) in keys.zip(json_results) {
|
for (key, json_result) in keys.zip(json_results) {
|
||||||
pipeline.set_ex(key, json_result, self.cache_ttl.into());
|
self.pipeline
|
||||||
|
.set_ex(key, json_result, self.cache_ttl.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut result: Result<(), RedisError> = pipeline
|
let mut result: Result<(), RedisError> = self
|
||||||
|
.pipeline
|
||||||
.query_async(&mut self.connection_pool[self.current_connection as usize])
|
.query_async(&mut self.connection_pool[self.current_connection as usize])
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@ -149,7 +165,8 @@ impl RedisCache {
|
|||||||
CacheError::PoolExhaustionWithConnectionDropError,
|
CacheError::PoolExhaustionWithConnectionDropError,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
result = pipeline
|
result = self
|
||||||
|
.pipeline
|
||||||
.query_async(
|
.query_async(
|
||||||
&mut self.connection_pool[self.current_connection as usize],
|
&mut self.connection_pool[self.current_connection as usize],
|
||||||
)
|
)
|
||||||
|
@ -9,7 +9,6 @@ use mlua::Lua;
|
|||||||
use std::{collections::HashMap, fs, thread::available_parallelism};
|
use std::{collections::HashMap, fs, thread::available_parallelism};
|
||||||
|
|
||||||
/// A named struct which stores the parsed config file options.
|
/// A named struct which stores the parsed config file options.
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
/// It stores the parsed port number option on which the server should launch.
|
/// It stores the parsed port number option on which the server should launch.
|
||||||
pub port: u16,
|
pub port: u16,
|
||||||
|
@ -48,7 +48,7 @@ impl SearchEngine for Bing {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
_safe_search: u8,
|
_safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
// Bing uses `start results from this number` convention
|
// Bing uses `start results from this number` convention
|
||||||
// So, for 10 results per page, page 0 starts at 1, page 1
|
// So, for 10 results per page, page 0 starts at 1, page 1
|
||||||
// starts at 11, and so on.
|
// starts at 11, and so on.
|
||||||
|
@ -44,7 +44,7 @@ impl SearchEngine for Brave {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
safe_search: u8,
|
safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
let url = format!("https://search.brave.com/search?q={query}&offset={page}");
|
let url = format!("https://search.brave.com/search?q={query}&offset={page}");
|
||||||
|
|
||||||
let safe_search_level = match safe_search {
|
let safe_search_level = match safe_search {
|
||||||
|
@ -47,7 +47,7 @@ impl SearchEngine for DuckDuckGo {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
_safe_search: u8,
|
_safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
// Page number can be missing or empty string and so appropriate handling is required
|
// Page number can be missing or empty string and so appropriate handling is required
|
||||||
// so that upstream server recieves valid page number.
|
// so that upstream server recieves valid page number.
|
||||||
let url: String = match page {
|
let url: String = match page {
|
||||||
|
@ -62,7 +62,7 @@ impl SearchEngine for LibreX {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
_safe_search: u8,
|
_safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
// Page number can be missing or empty string and so appropriate handling is required
|
// Page number can be missing or empty string and so appropriate handling is required
|
||||||
// so that upstream server recieves valid page number.
|
// so that upstream server recieves valid page number.
|
||||||
let url: String = format!(
|
let url: String = format!(
|
||||||
|
@ -47,7 +47,7 @@ impl SearchEngine for Mojeek {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
safe_search: u8,
|
safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
// Mojeek uses `start results from this number` convention
|
// Mojeek uses `start results from this number` convention
|
||||||
// So, for 10 results per page, page 0 starts at 1, page 1
|
// So, for 10 results per page, page 0 starts at 1, page 1
|
||||||
// starts at 11, and so on.
|
// starts at 11, and so on.
|
||||||
@ -72,8 +72,23 @@ impl SearchEngine for Mojeek {
|
|||||||
"Yep",
|
"Yep",
|
||||||
"You",
|
"You",
|
||||||
];
|
];
|
||||||
|
|
||||||
let qss = search_engines.join("%2C");
|
let qss = search_engines.join("%2C");
|
||||||
let safe = if safe_search == 0 { "0" } else { "1" };
|
|
||||||
|
// A branchless condition to check whether the `safe_search` parameter has the
|
||||||
|
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
|
||||||
|
// the value to 1 for all other values of `safe_search`
|
||||||
|
//
|
||||||
|
// Moreover, the below branchless code is equivalent to the following code below:
|
||||||
|
//
|
||||||
|
// ```rust
|
||||||
|
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// For more information on branchless programming. See:
|
||||||
|
//
|
||||||
|
// * https://piped.video/watch?v=bVJ-mWWL7cE
|
||||||
|
let safe = u8::from(safe_search != 0).to_string();
|
||||||
|
|
||||||
// Mojeek detects automated requests, these are preferences that are
|
// Mojeek detects automated requests, these are preferences that are
|
||||||
// able to circumvent the countermeasure. Some of these are
|
// able to circumvent the countermeasure. Some of these are
|
||||||
@ -89,7 +104,7 @@ impl SearchEngine for Mojeek {
|
|||||||
("hp", "minimal"),
|
("hp", "minimal"),
|
||||||
("lb", "en"),
|
("lb", "en"),
|
||||||
("qss", &qss),
|
("qss", &qss),
|
||||||
("safe", safe),
|
("safe", &safe),
|
||||||
];
|
];
|
||||||
|
|
||||||
let mut query_params_string = String::new();
|
let mut query_params_string = String::new();
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
//! This modules provides helper functionalities for parsing a html document into internal SearchResult.
|
//! This modules provides helper functionalities for parsing a html document into internal SearchResult.
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
|
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
|
||||||
use error_stack::{Report, Result};
|
use error_stack::{Report, Result};
|
||||||
@ -47,7 +46,7 @@ impl SearchResultParser {
|
|||||||
&self,
|
&self,
|
||||||
document: &Html,
|
document: &Html,
|
||||||
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
|
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
let res = document
|
let res = document
|
||||||
.select(&self.results)
|
.select(&self.results)
|
||||||
.filter_map(|result| {
|
.filter_map(|result| {
|
||||||
|
@ -43,12 +43,21 @@ impl SearchEngine for Searx {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
mut safe_search: u8,
|
mut safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
// Page number can be missing or empty string and so appropriate handling is required
|
// A branchless condition to check whether the `safe_search` parameter has the
|
||||||
// so that upstream server recieves valid page number.
|
// value greater than equal to three or not. If it is, then it modifies the
|
||||||
if safe_search == 3 {
|
// `safesearch` parameters value to 2.
|
||||||
safe_search = 2;
|
//
|
||||||
};
|
// Moreover, the below branchless code is equivalent to the following code below:
|
||||||
|
//
|
||||||
|
// ```rust
|
||||||
|
// safe_search = u8::from(safe_search == 3) * 2;
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// For more information on branchless programming. See:
|
||||||
|
//
|
||||||
|
// * https://piped.video/watch?v=bVJ-mWWL7cE
|
||||||
|
safe_search = u8::from(safe_search >= 3) * 2;
|
||||||
|
|
||||||
let url: String = format!(
|
let url: String = format!(
|
||||||
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",
|
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",
|
||||||
|
@ -47,7 +47,7 @@ impl SearchEngine for Startpage {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
_safe_search: u8,
|
_safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
// Page number can be missing or empty string and so appropriate handling is required
|
// Page number can be missing or empty string and so appropriate handling is required
|
||||||
// so that upstream server recieves valid page number.
|
// so that upstream server recieves valid page number.
|
||||||
let url: String = format!(
|
let url: String = format!(
|
||||||
|
29
src/lib.rs
29
src/lib.rs
@ -14,7 +14,7 @@ pub mod results;
|
|||||||
pub mod server;
|
pub mod server;
|
||||||
pub mod templates;
|
pub mod templates;
|
||||||
|
|
||||||
use std::net::TcpListener;
|
use std::{net::TcpListener, sync::OnceLock};
|
||||||
|
|
||||||
use crate::server::router;
|
use crate::server::router;
|
||||||
|
|
||||||
@ -31,6 +31,9 @@ use cache::cacher::{Cacher, SharedCache};
|
|||||||
use config::parser::Config;
|
use config::parser::Config;
|
||||||
use handler::{file_path, FileType};
|
use handler::{file_path, FileType};
|
||||||
|
|
||||||
|
/// A static constant for holding the cache struct.
|
||||||
|
static SHARED_CACHE: OnceLock<SharedCache> = OnceLock::new();
|
||||||
|
|
||||||
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
@ -44,27 +47,29 @@ use handler::{file_path, FileType};
|
|||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
/// use std::net::TcpListener;
|
/// use std::{net::TcpListener, sync::OnceLock};
|
||||||
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
|
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
|
||||||
///
|
///
|
||||||
|
/// /// A static constant for holding the parsed config.
|
||||||
|
/// static CONFIG: OnceLock<Config> = OnceLock::new();
|
||||||
|
///
|
||||||
/// #[tokio::main]
|
/// #[tokio::main]
|
||||||
/// async fn main(){
|
/// async fn main(){
|
||||||
/// let config = Config::parse(true).unwrap();
|
/// // Initialize the parsed config globally.
|
||||||
|
/// let config = CONFIG.get_or_init(|| Config::parse(true).unwrap());
|
||||||
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
|
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
|
||||||
/// let cache = create_cache(&config).await;
|
/// let cache = create_cache(config).await;
|
||||||
/// let server = run(listener,config,cache).expect("Failed to start server");
|
/// let server = run(listener,&config,cache).expect("Failed to start server");
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
pub fn run(
|
pub fn run(
|
||||||
listener: TcpListener,
|
listener: TcpListener,
|
||||||
config: Config,
|
config: &'static Config,
|
||||||
cache: impl Cacher + 'static,
|
cache: impl Cacher + 'static,
|
||||||
) -> std::io::Result<Server> {
|
) -> std::io::Result<Server> {
|
||||||
let public_folder_path: &str = file_path(FileType::Theme)?;
|
let public_folder_path: &str = file_path(FileType::Theme)?;
|
||||||
|
|
||||||
let cloned_config_threads_opt: u8 = config.threads;
|
let cache = SHARED_CACHE.get_or_init(|| SharedCache::new(cache));
|
||||||
|
|
||||||
let cache = web::Data::new(SharedCache::new(cache));
|
|
||||||
|
|
||||||
let server = HttpServer::new(move || {
|
let server = HttpServer::new(move || {
|
||||||
let cors: Cors = Cors::default()
|
let cors: Cors = Cors::default()
|
||||||
@ -81,8 +86,8 @@ pub fn run(
|
|||||||
// Compress the responses provided by the server for the client requests.
|
// Compress the responses provided by the server for the client requests.
|
||||||
.wrap(Compress::default())
|
.wrap(Compress::default())
|
||||||
.wrap(Logger::default()) // added logging middleware for logging.
|
.wrap(Logger::default()) // added logging middleware for logging.
|
||||||
.app_data(web::Data::new(config.clone()))
|
.app_data(web::Data::new(config))
|
||||||
.app_data(cache.clone())
|
.app_data(web::Data::new(cache))
|
||||||
.wrap(cors)
|
.wrap(cors)
|
||||||
.wrap(Governor::new(
|
.wrap(Governor::new(
|
||||||
&GovernorConfigBuilder::default()
|
&GovernorConfigBuilder::default()
|
||||||
@ -107,7 +112,7 @@ pub fn run(
|
|||||||
.service(router::settings) // settings page
|
.service(router::settings) // settings page
|
||||||
.default_service(web::route().to(router::not_found)) // error page
|
.default_service(web::route().to(router::not_found)) // error page
|
||||||
})
|
})
|
||||||
.workers(cloned_config_threads_opt as usize)
|
.workers(config.threads as usize)
|
||||||
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
||||||
.listen(listener)?
|
.listen(listener)?
|
||||||
.run();
|
.run();
|
||||||
|
@ -154,8 +154,8 @@ impl SearchResults {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A setter function that sets the filtered to true.
|
/// A setter function that sets the filtered to true.
|
||||||
pub fn set_filtered(&mut self) {
|
pub fn set_filtered(&mut self, filtered: bool) {
|
||||||
self.filtered = true;
|
self.filtered = filtered;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A getter function that gets the value of `engine_errors_info`.
|
/// A getter function that gets the value of `engine_errors_info`.
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
use super::aggregation_models::SearchResult;
|
use super::aggregation_models::SearchResult;
|
||||||
use error_stack::{Report, Result, ResultExt};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use std::{collections::HashMap, fmt};
|
use std::fmt;
|
||||||
|
|
||||||
/// A custom error type used for handle engine associated errors.
|
/// A custom error type used for handle engine associated errors.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -147,7 +147,7 @@ pub trait SearchEngine: Sync + Send {
|
|||||||
user_agent: &str,
|
user_agent: &str,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
safe_search: u8,
|
safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
) -> Result<Vec<(String, SearchResult)>, EngineError>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A named struct which stores the engine struct with the name of the associated engine.
|
/// A named struct which stores the engine struct with the name of the associated engine.
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
/// order to allow the deserializing the json back to struct in aggregate function in
|
/// order to allow the deserializing the json back to struct in aggregate function in
|
||||||
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
||||||
/// it to the template files.
|
/// it to the template files.
|
||||||
#[derive(Clone, Default)]
|
#[derive(Default)]
|
||||||
pub struct Style {
|
pub struct Style {
|
||||||
/// It stores the parsed theme option used to set a theme for the website.
|
/// It stores the parsed theme option used to set a theme for the website.
|
||||||
pub theme: String,
|
pub theme: String,
|
||||||
@ -40,7 +40,6 @@ impl Style {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Configuration options for the aggregator.
|
/// Configuration options for the aggregator.
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct AggregatorConfig {
|
pub struct AggregatorConfig {
|
||||||
/// It stores the option to whether enable or disable random delays between
|
/// It stores the option to whether enable or disable random delays between
|
||||||
/// requests.
|
/// requests.
|
||||||
@ -48,7 +47,6 @@ pub struct AggregatorConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Configuration options for the rate limiter middleware.
|
/// Configuration options for the rate limiter middleware.
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct RateLimiter {
|
pub struct RateLimiter {
|
||||||
/// The number of request that are allowed within a provided time limit.
|
/// The number of request that are allowed within a provided time limit.
|
||||||
pub number_of_requests: u8,
|
pub number_of_requests: u8,
|
||||||
|
@ -9,22 +9,25 @@ use crate::models::{
|
|||||||
engine_models::{EngineError, EngineHandler},
|
engine_models::{EngineError, EngineHandler},
|
||||||
};
|
};
|
||||||
use error_stack::Report;
|
use error_stack::Report;
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::{Client, ClientBuilder};
|
use reqwest::{Client, ClientBuilder};
|
||||||
|
use std::sync::Arc;
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
use std::{
|
use tokio::{
|
||||||
collections::HashMap,
|
fs::File,
|
||||||
io::{BufReader, Read},
|
io::{AsyncBufReadExt, BufReader},
|
||||||
|
task::JoinHandle,
|
||||||
time::Duration,
|
time::Duration,
|
||||||
};
|
};
|
||||||
use std::{fs::File, io::BufRead};
|
|
||||||
use tokio::task::JoinHandle;
|
|
||||||
|
|
||||||
/// A constant for holding the prebuilt Client globally in the app.
|
/// A constant for holding the prebuilt Client globally in the app.
|
||||||
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
|
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
|
||||||
|
|
||||||
/// Aliases for long type annotations
|
/// Aliases for long type annotations
|
||||||
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
|
||||||
|
type FutureVec =
|
||||||
|
FuturesUnordered<JoinHandle<Result<Vec<(String, SearchResult)>, Report<EngineError>>>>;
|
||||||
|
|
||||||
/// The function aggregates the scraped results from the user-selected upstream search engines.
|
/// The function aggregates the scraped results from the user-selected upstream search engines.
|
||||||
/// These engines can be chosen either from the user interface (UI) or from the configuration file.
|
/// These engines can be chosen either from the user interface (UI) or from the configuration file.
|
||||||
@ -37,7 +40,7 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|||||||
///
|
///
|
||||||
/// Additionally, the function eliminates duplicate results. If two results are identified as coming from
|
/// Additionally, the function eliminates duplicate results. If two results are identified as coming from
|
||||||
/// multiple engines, their names are combined to indicate that the results were fetched from these upstream
|
/// multiple engines, their names are combined to indicate that the results were fetched from these upstream
|
||||||
/// engines. After this, all the data in the `HashMap` is removed and placed into a struct that contains all
|
/// engines. After this, all the data in the `Vec` is removed and placed into a struct that contains all
|
||||||
/// the aggregated results in a vector. Furthermore, the query used is also added to the struct. This step is
|
/// the aggregated results in a vector. Furthermore, the query used is also added to the struct. This step is
|
||||||
/// necessary to ensure that the search bar in the search remains populated even when searched from the query URL.
|
/// necessary to ensure that the search bar in the search remains populated even when searched from the query URL.
|
||||||
///
|
///
|
||||||
@ -94,15 +97,22 @@ pub async fn aggregate(
|
|||||||
let mut names: Vec<&str> = Vec::with_capacity(0);
|
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||||
|
|
||||||
// create tasks for upstream result fetching
|
// create tasks for upstream result fetching
|
||||||
let mut tasks: FutureVec = FutureVec::new();
|
let tasks: FutureVec = FutureVec::new();
|
||||||
|
|
||||||
|
let query: Arc<String> = Arc::new(query.to_string());
|
||||||
for engine_handler in upstream_search_engines {
|
for engine_handler in upstream_search_engines {
|
||||||
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
let (name, search_engine) = engine_handler.clone().into_name_engine();
|
||||||
names.push(name);
|
names.push(name);
|
||||||
let query: String = query.to_owned();
|
let query_partially_cloned = query.clone();
|
||||||
tasks.push(tokio::spawn(async move {
|
tasks.push(tokio::spawn(async move {
|
||||||
search_engine
|
search_engine
|
||||||
.results(&query, page, user_agent, client, safe_search)
|
.results(
|
||||||
|
&query_partially_cloned,
|
||||||
|
page,
|
||||||
|
user_agent,
|
||||||
|
client,
|
||||||
|
safe_search,
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@ -117,7 +127,7 @@ pub async fn aggregate(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// aggregate search results, removing duplicates and handling errors the upstream engines returned
|
// aggregate search results, removing duplicates and handling errors the upstream engines returned
|
||||||
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
let mut result_map: Vec<(String, SearchResult)> = Vec::new();
|
||||||
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
||||||
|
|
||||||
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
||||||
@ -134,51 +144,45 @@ pub async fn aggregate(
|
|||||||
|
|
||||||
if result_map.is_empty() {
|
if result_map.is_empty() {
|
||||||
match response {
|
match response {
|
||||||
Ok(results) => {
|
Ok(results) => result_map = results,
|
||||||
result_map = results.clone();
|
Err(error) => handle_error(&error, engine),
|
||||||
}
|
};
|
||||||
Err(error) => {
|
|
||||||
handle_error(&error, engine);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
match response {
|
match response {
|
||||||
Ok(result) => {
|
Ok(result) => {
|
||||||
result.into_iter().for_each(|(key, value)| {
|
result.into_iter().for_each(|(key, value)| {
|
||||||
result_map
|
match result_map.iter().find(|(key_s, _)| key_s == &key) {
|
||||||
.entry(key)
|
Some(value) => value.1.to_owned().add_engines(engine),
|
||||||
.and_modify(|result| {
|
None => result_map.push((key, value)),
|
||||||
result.add_engines(engine);
|
};
|
||||||
})
|
|
||||||
.or_insert_with(|| -> SearchResult { value });
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => handle_error(&error, engine),
|
||||||
handle_error(&error, engine);
|
};
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if safe_search >= 3 {
|
if safe_search >= 3 {
|
||||||
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
let mut blacklist_map: Vec<(String, SearchResult)> = Vec::new();
|
||||||
filter_with_lists(
|
filter_with_lists(
|
||||||
&mut result_map,
|
&mut result_map,
|
||||||
&mut blacklist_map,
|
&mut blacklist_map,
|
||||||
file_path(FileType::BlockList)?,
|
file_path(FileType::BlockList)?,
|
||||||
)?;
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
filter_with_lists(
|
filter_with_lists(
|
||||||
&mut blacklist_map,
|
&mut blacklist_map,
|
||||||
&mut result_map,
|
&mut result_map,
|
||||||
file_path(FileType::AllowList)?,
|
file_path(FileType::AllowList)?,
|
||||||
)?;
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
drop(blacklist_map);
|
drop(blacklist_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
let results: Vec<SearchResult> = result_map.iter().map(|(_, value)| value.clone()).collect();
|
||||||
|
|
||||||
Ok(SearchResults::new(results, &engine_errors_info))
|
Ok(SearchResults::new(results, &engine_errors_info))
|
||||||
}
|
}
|
||||||
@ -187,35 +191,41 @@ pub async fn aggregate(
|
|||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
|
/// * `map_to_be_filtered` - A mutable reference to a `Vec` of search results to filter, where the filtered results will be removed from.
|
||||||
/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
|
/// * `resultant_map` - A mutable reference to a `Vec` to hold the filtered results.
|
||||||
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
|
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
///
|
||||||
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
||||||
pub fn filter_with_lists(
|
pub async fn filter_with_lists(
|
||||||
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
map_to_be_filtered: &mut Vec<(String, SearchResult)>,
|
||||||
resultant_map: &mut HashMap<String, SearchResult>,
|
resultant_map: &mut Vec<(String, SearchResult)>,
|
||||||
file_path: &str,
|
file_path: &str,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut reader = BufReader::new(File::open(file_path)?);
|
let reader = BufReader::new(File::open(file_path).await?);
|
||||||
|
let mut lines = reader.lines();
|
||||||
|
|
||||||
for line in reader.by_ref().lines() {
|
while let Some(line) = lines.next_line().await? {
|
||||||
let re = Regex::new(line?.trim())?;
|
let re = Regex::new(line.trim())?;
|
||||||
|
|
||||||
|
let mut length = map_to_be_filtered.len();
|
||||||
|
let mut idx: usize = Default::default();
|
||||||
// Iterate over each search result in the map and check if it matches the regex pattern
|
// Iterate over each search result in the map and check if it matches the regex pattern
|
||||||
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
while idx < length {
|
||||||
if re.is_match(&url.to_lowercase())
|
let ele = &map_to_be_filtered[idx];
|
||||||
|| re.is_match(&search_result.title.to_lowercase())
|
let ele_inner = &ele.1;
|
||||||
|| re.is_match(&search_result.description.to_lowercase())
|
match re.is_match(&ele.0.to_lowercase())
|
||||||
|
|| re.is_match(&ele_inner.title.to_lowercase())
|
||||||
|
|| re.is_match(&ele_inner.description.to_lowercase())
|
||||||
{
|
{
|
||||||
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
true => {
|
||||||
resultant_map.insert(
|
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||||
url.to_owned(),
|
resultant_map.push(map_to_be_filtered.swap_remove(idx));
|
||||||
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
length -= 1;
|
||||||
);
|
}
|
||||||
}
|
false => idx += 1,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,15 +236,14 @@ pub fn filter_with_lists(
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use smallvec::smallvec;
|
use smallvec::smallvec;
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
async fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
// Create a map of search results to filter
|
// Create a map of search results to filter
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = Vec::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.push((
|
||||||
"https://www.example.com".to_owned(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_owned(),
|
title: "Example Domain".to_owned(),
|
||||||
@ -243,15 +252,15 @@ mod tests {
|
|||||||
.to_owned(),
|
.to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
));
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.push((
|
||||||
"https://www.rust-lang.org/".to_owned(),
|
"https://www.rust-lang.org/".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Rust Programming Language".to_owned(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_owned(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
},
|
},)
|
||||||
);
|
);
|
||||||
|
|
||||||
// Create a temporary file with regex patterns
|
// Create a temporary file with regex patterns
|
||||||
@ -260,25 +269,30 @@ mod tests {
|
|||||||
writeln!(file, "rust")?;
|
writeln!(file, "rust")?;
|
||||||
file.flush()?;
|
file.flush()?;
|
||||||
|
|
||||||
let mut resultant_map = HashMap::new();
|
let mut resultant_map = Vec::new();
|
||||||
filter_with_lists(
|
filter_with_lists(
|
||||||
&mut map_to_be_filtered,
|
&mut map_to_be_filtered,
|
||||||
&mut resultant_map,
|
&mut resultant_map,
|
||||||
file.path().to_str().unwrap(),
|
file.path().to_str().unwrap(),
|
||||||
)?;
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
assert_eq!(resultant_map.len(), 2);
|
assert_eq!(resultant_map.len(), 2);
|
||||||
assert!(resultant_map.contains_key("https://www.example.com"));
|
assert!(resultant_map
|
||||||
assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
|
.iter()
|
||||||
|
.any(|(key, _)| key == "https://www.example.com"));
|
||||||
|
assert!(resultant_map
|
||||||
|
.iter()
|
||||||
|
.any(|(key, _)| key == "https://www.rust-lang.org/"));
|
||||||
assert_eq!(map_to_be_filtered.len(), 0);
|
assert_eq!(map_to_be_filtered.len(), 0);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
async fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = Vec::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.push((
|
||||||
"https://www.example.com".to_owned(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_owned(),
|
title: "Example Domain".to_owned(),
|
||||||
@ -287,8 +301,8 @@ mod tests {
|
|||||||
.to_owned(),
|
.to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
));
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.push((
|
||||||
"https://www.rust-lang.org/".to_owned(),
|
"https://www.rust-lang.org/".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Rust Programming Language".to_owned(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
@ -296,34 +310,39 @@ mod tests {
|
|||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
));
|
||||||
|
|
||||||
// Create a temporary file with a regex pattern containing a wildcard
|
// Create a temporary file with a regex pattern containing a wildcard
|
||||||
let mut file = NamedTempFile::new()?;
|
let mut file = NamedTempFile::new()?;
|
||||||
writeln!(file, "ex.*le")?;
|
writeln!(file, "ex.*le")?;
|
||||||
file.flush()?;
|
file.flush()?;
|
||||||
|
|
||||||
let mut resultant_map = HashMap::new();
|
let mut resultant_map = Vec::new();
|
||||||
|
|
||||||
filter_with_lists(
|
filter_with_lists(
|
||||||
&mut map_to_be_filtered,
|
&mut map_to_be_filtered,
|
||||||
&mut resultant_map,
|
&mut resultant_map,
|
||||||
file.path().to_str().unwrap(),
|
file.path().to_str().unwrap(),
|
||||||
)?;
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
assert_eq!(resultant_map.len(), 1);
|
assert_eq!(resultant_map.len(), 1);
|
||||||
assert!(resultant_map.contains_key("https://www.example.com"));
|
assert!(resultant_map
|
||||||
|
.iter()
|
||||||
|
.any(|(key, _)| key == "https://www.example.com"));
|
||||||
assert_eq!(map_to_be_filtered.len(), 1);
|
assert_eq!(map_to_be_filtered.len(), 1);
|
||||||
assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
|
assert!(map_to_be_filtered
|
||||||
|
.iter()
|
||||||
|
.any(|(key, _)| key == "https://www.rust-lang.org/"));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_filter_with_lists_file_not_found() {
|
async fn test_filter_with_lists_file_not_found() {
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = Vec::new();
|
||||||
|
|
||||||
let mut resultant_map = HashMap::new();
|
let mut resultant_map = Vec::new();
|
||||||
|
|
||||||
// Call the `filter_with_lists` function with a non-existent file path
|
// Call the `filter_with_lists` function with a non-existent file path
|
||||||
let result = filter_with_lists(
|
let result = filter_with_lists(
|
||||||
@ -332,13 +351,13 @@ mod tests {
|
|||||||
"non-existent-file.txt",
|
"non-existent-file.txt",
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(result.is_err());
|
assert!(result.await.is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_filter_with_lists_invalid_regex() {
|
async fn test_filter_with_lists_invalid_regex() {
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = Vec::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.push((
|
||||||
"https://www.example.com".to_owned(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_owned(),
|
title: "Example Domain".to_owned(),
|
||||||
@ -347,9 +366,9 @@ mod tests {
|
|||||||
.to_owned(),
|
.to_owned(),
|
||||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
));
|
||||||
|
|
||||||
let mut resultant_map = HashMap::new();
|
let mut resultant_map = Vec::new();
|
||||||
|
|
||||||
// Create a temporary file with an invalid regex pattern
|
// Create a temporary file with an invalid regex pattern
|
||||||
let mut file = NamedTempFile::new().unwrap();
|
let mut file = NamedTempFile::new().unwrap();
|
||||||
@ -362,6 +381,6 @@ mod tests {
|
|||||||
file.path().to_str().unwrap(),
|
file.path().to_str().unwrap(),
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(result.is_err());
|
assert!(result.await.is_err());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,11 +7,13 @@ use crate::{
|
|||||||
handler::{file_path, FileType},
|
handler::{file_path, FileType},
|
||||||
};
|
};
|
||||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||||
use std::fs::read_to_string;
|
use tokio::fs::read_to_string;
|
||||||
|
|
||||||
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
||||||
#[get("/")]
|
#[get("/")]
|
||||||
pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
pub async fn index(
|
||||||
|
config: web::Data<&'static Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||||
crate::templates::views::index::index(
|
crate::templates::views::index::index(
|
||||||
&config.style.colorscheme,
|
&config.style.colorscheme,
|
||||||
@ -25,7 +27,7 @@ pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn st
|
|||||||
/// Handles the route of any other accessed route/page which is not provided by the
|
/// Handles the route of any other accessed route/page which is not provided by the
|
||||||
/// website essentially the 404 error page.
|
/// website essentially the 404 error page.
|
||||||
pub async fn not_found(
|
pub async fn not_found(
|
||||||
config: web::Data<Config>,
|
config: web::Data<&'static Config>,
|
||||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||||
crate::templates::views::not_found::not_found(
|
crate::templates::views::not_found::not_found(
|
||||||
@ -41,7 +43,7 @@ pub async fn not_found(
|
|||||||
#[get("/robots.txt")]
|
#[get("/robots.txt")]
|
||||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
let page_content: String =
|
let page_content: String =
|
||||||
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
|
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?)).await?;
|
||||||
Ok(HttpResponse::Ok()
|
Ok(HttpResponse::Ok()
|
||||||
.content_type(ContentType::plaintext())
|
.content_type(ContentType::plaintext())
|
||||||
.body(page_content))
|
.body(page_content))
|
||||||
@ -49,7 +51,9 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
|
|||||||
|
|
||||||
/// Handles the route of about page of the `websurfx` meta search engine website.
|
/// Handles the route of about page of the `websurfx` meta search engine website.
|
||||||
#[get("/about")]
|
#[get("/about")]
|
||||||
pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
pub async fn about(
|
||||||
|
config: web::Data<&'static Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||||
crate::templates::views::about::about(
|
crate::templates::views::about::about(
|
||||||
&config.style.colorscheme,
|
&config.style.colorscheme,
|
||||||
@ -63,7 +67,7 @@ pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn st
|
|||||||
/// Handles the route of settings page of the `websurfx` meta search engine website.
|
/// Handles the route of settings page of the `websurfx` meta search engine website.
|
||||||
#[get("/settings")]
|
#[get("/settings")]
|
||||||
pub async fn settings(
|
pub async fn settings(
|
||||||
config: web::Data<Config>,
|
config: web::Data<&'static Config>,
|
||||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
|
||||||
crate::templates::views::settings::settings(
|
crate::templates::views::settings::settings(
|
||||||
|
@ -13,12 +13,12 @@ use crate::{
|
|||||||
};
|
};
|
||||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::{
|
use std::borrow::Cow;
|
||||||
borrow::Cow,
|
use tokio::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{BufRead, BufReader, Read},
|
io::{AsyncBufReadExt, BufReader},
|
||||||
|
join,
|
||||||
};
|
};
|
||||||
use tokio::join;
|
|
||||||
|
|
||||||
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
||||||
/// two search url parameters `q` and `page` where `page` parameter is optional.
|
/// two search url parameters `q` and `page` where `page` parameter is optional.
|
||||||
@ -37,8 +37,8 @@ use tokio::join;
|
|||||||
#[get("/search")]
|
#[get("/search")]
|
||||||
pub async fn search(
|
pub async fn search(
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
config: web::Data<Config>,
|
config: web::Data<&'static Config>,
|
||||||
cache: web::Data<SharedCache>,
|
cache: web::Data<&'static SharedCache>,
|
||||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
||||||
@ -70,8 +70,8 @@ pub async fn search(
|
|||||||
});
|
});
|
||||||
|
|
||||||
search_settings.safe_search_level = get_safesearch_level(
|
search_settings.safe_search_level = get_safesearch_level(
|
||||||
&Some(search_settings.safe_search_level),
|
params.safesearch,
|
||||||
¶ms.safesearch,
|
search_settings.safe_search_level,
|
||||||
config.safe_search,
|
config.safe_search,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -158,8 +158,8 @@ pub async fn search(
|
|||||||
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
||||||
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
||||||
async fn results(
|
async fn results(
|
||||||
config: &Config,
|
config: &'static Config,
|
||||||
cache: &web::Data<SharedCache>,
|
cache: &'static SharedCache,
|
||||||
query: &str,
|
query: &str,
|
||||||
page: u32,
|
page: u32,
|
||||||
search_settings: &server_models::Cookie<'_>,
|
search_settings: &server_models::Cookie<'_>,
|
||||||
@ -188,7 +188,7 @@ async fn results(
|
|||||||
let mut results: SearchResults = SearchResults::default();
|
let mut results: SearchResults = SearchResults::default();
|
||||||
|
|
||||||
let flag: bool =
|
let flag: bool =
|
||||||
!is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
!is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?;
|
||||||
// Return early when query contains disallowed words,
|
// Return early when query contains disallowed words,
|
||||||
if flag {
|
if flag {
|
||||||
results.set_disallowed();
|
results.set_disallowed();
|
||||||
@ -225,12 +225,12 @@ async fn results(
|
|||||||
search_results
|
search_results
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if results.engine_errors_info().is_empty()
|
let (engine_errors_info, results_empty_check, no_engines_selected) = (
|
||||||
&& results.results().is_empty()
|
results.engine_errors_info().is_empty(),
|
||||||
&& !results.no_engines_selected()
|
results.results().is_empty(),
|
||||||
{
|
results.no_engines_selected(),
|
||||||
results.set_filtered();
|
);
|
||||||
}
|
results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected);
|
||||||
cache
|
cache
|
||||||
.cache_results(&[results.clone()], &[cache_key.clone()])
|
.cache_results(&[results.clone()], &[cache_key.clone()])
|
||||||
.await?;
|
.await?;
|
||||||
@ -252,13 +252,14 @@ async fn results(
|
|||||||
///
|
///
|
||||||
/// Returns a bool indicating whether the results were found in the list or not on success
|
/// Returns a bool indicating whether the results were found in the list or not on success
|
||||||
/// otherwise returns a standard error type on a failure.
|
/// otherwise returns a standard error type on a failure.
|
||||||
fn is_match_from_filter_list(
|
async fn is_match_from_filter_list(
|
||||||
file_path: &str,
|
file_path: &str,
|
||||||
query: &str,
|
query: &str,
|
||||||
) -> Result<bool, Box<dyn std::error::Error>> {
|
) -> Result<bool, Box<dyn std::error::Error>> {
|
||||||
let mut reader = BufReader::new(File::open(file_path)?);
|
let reader = BufReader::new(File::open(file_path).await?);
|
||||||
for line in reader.by_ref().lines() {
|
let mut lines = reader.lines();
|
||||||
let re = Regex::new(&line?)?;
|
while let Some(line) = lines.next_line().await? {
|
||||||
|
let re = Regex::new(&line)?;
|
||||||
if re.is_match(query) {
|
if re.is_match(query) {
|
||||||
return Ok(true);
|
return Ok(true);
|
||||||
}
|
}
|
||||||
@ -267,24 +268,95 @@ fn is_match_from_filter_list(
|
|||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A helper function to modify the safe search level based on the url params.
|
/// A helper function to choose the safe search level value based on the URL parameters,
|
||||||
/// The `safe_search` is the one in the user's cookie or
|
/// cookie value and config value.
|
||||||
/// the default set by the server config if the cookie was missing.
|
|
||||||
///
|
///
|
||||||
/// # Argurments
|
/// # Argurments
|
||||||
///
|
///
|
||||||
/// * `url_level` - Safe search level from the url.
|
/// * `safe_search_level_from_url` - Safe search level from the URL parameters.
|
||||||
/// * `safe_search` - User's cookie, or the safe search level set by the server
|
/// * `cookie_safe_search_level` - Safe search level value from the cookie.
|
||||||
/// * `config_level` - Safe search level to fall back to
|
/// * `config_safe_search_level` - Safe search level value from the config file.
|
||||||
fn get_safesearch_level(cookie_level: &Option<u8>, url_level: &Option<u8>, config_level: u8) -> u8 {
|
///
|
||||||
match url_level {
|
/// # Returns
|
||||||
Some(url_level) => {
|
///
|
||||||
if *url_level >= 3 {
|
/// Returns an appropriate safe search level value based on the safe search level values
|
||||||
config_level
|
/// from the URL parameters, cookie and the config file.
|
||||||
} else {
|
fn get_safesearch_level(
|
||||||
*url_level
|
safe_search_level_from_url: Option<u8>,
|
||||||
}
|
cookie_safe_search_level: u8,
|
||||||
|
config_safe_search_level: u8,
|
||||||
|
) -> u8 {
|
||||||
|
(u8::from(safe_search_level_from_url.is_some())
|
||||||
|
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||||
|
+ (u8::from(config_safe_search_level < 3) * safe_search_level_from_url.unwrap_or(0))))
|
||||||
|
+ (u8::from(safe_search_level_from_url.is_none())
|
||||||
|
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||||
|
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
/// A helper function which creates a random mock safe search level value.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// Returns an optional u8 value.
|
||||||
|
fn mock_safe_search_level_value() -> Option<u8> {
|
||||||
|
let nanos = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.subsec_nanos() as f32;
|
||||||
|
let delay = ((nanos / 1_0000_0000 as f32).floor() as i8) - 1;
|
||||||
|
|
||||||
|
match delay {
|
||||||
|
-1 => None,
|
||||||
|
some_num => Some(if some_num > 4 { some_num - 4 } else { some_num } as u8),
|
||||||
}
|
}
|
||||||
None => cookie_level.unwrap_or(config_level),
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
/// A test function to test whether the output of the branchless and branched code
|
||||||
|
/// for the code to choose the appropriate safe search level is same or not.
|
||||||
|
fn get_safesearch_level_branched_branchless_code_test() {
|
||||||
|
// Get mock values for the safe search level values for URL parameters, cookie
|
||||||
|
// and config.
|
||||||
|
let safe_search_level_from_url = mock_safe_search_level_value();
|
||||||
|
let cookie_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
|
||||||
|
let config_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
|
||||||
|
|
||||||
|
// Branched code
|
||||||
|
let safe_search_level_value_from_branched_code = match safe_search_level_from_url {
|
||||||
|
Some(safe_search_level_from_url_parsed) => {
|
||||||
|
if config_safe_search_level >= 3 {
|
||||||
|
config_safe_search_level
|
||||||
|
} else {
|
||||||
|
safe_search_level_from_url_parsed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
if config_safe_search_level >= 3 {
|
||||||
|
config_safe_search_level
|
||||||
|
} else {
|
||||||
|
cookie_safe_search_level
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// branchless code
|
||||||
|
let safe_search_level_value_from_branchless_code =
|
||||||
|
(u8::from(safe_search_level_from_url.is_some())
|
||||||
|
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||||
|
+ (u8::from(config_safe_search_level < 3)
|
||||||
|
* safe_search_level_from_url.unwrap_or(0))))
|
||||||
|
+ (u8::from(safe_search_level_from_url.is_none())
|
||||||
|
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
|
||||||
|
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
safe_search_level_value_from_branched_code,
|
||||||
|
safe_search_level_value_from_branchless_code
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -55,7 +55,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
|
|||||||
input type="checkbox" class="engine" checked;
|
input type="checkbox" class="engine" checked;
|
||||||
span class="slider round"{}
|
span class="slider round"{}
|
||||||
}
|
}
|
||||||
(format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned()))
|
(format!("{}{}",&engine_name[..1].to_uppercase(), &engine_name[1..]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@else {
|
@else {
|
||||||
@ -64,7 +64,7 @@ pub fn engines(engine_names: &HashMap<String, bool>) -> Markup {
|
|||||||
input type="checkbox" class="engine";
|
input type="checkbox" class="engine";
|
||||||
span class="slider round"{}
|
span class="slider round"{}
|
||||||
}
|
}
|
||||||
(format!("{}{}",engine_name[..1].to_uppercase().to_owned(), engine_name[1..].to_owned()))
|
(format!("{}{}",&engine_name[..1], &engine_name[1..]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,7 @@ fn style_option_list(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if style_type == "animations" {
|
if style_type == "animations" {
|
||||||
style_option_names.push(("".to_owned(), "none".to_owned()))
|
style_option_names.push((String::default(), "none".to_owned()))
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(style_option_names)
|
Ok(style_option_names)
|
||||||
@ -83,9 +83,11 @@ pub fn user_interface(
|
|||||||
"Select the animation for your theme to be used in user interface"
|
"Select the animation for your theme to be used in user interface"
|
||||||
}
|
}
|
||||||
select name="animations"{
|
select name="animations"{
|
||||||
|
@let default_animation = &String::default();
|
||||||
|
@let animation = animation.as_ref().unwrap_or(default_animation);
|
||||||
// Sets the user selected animation name from the config file as the first option in the selection list.
|
// Sets the user selected animation name from the config file as the first option in the selection list.
|
||||||
option value=(animation.as_ref().unwrap_or(&"".to_owned())){(animation.as_ref().unwrap_or(&"".to_owned()).replace('-'," "))}
|
option value=(animation){(animation.replace('-'," "))}
|
||||||
@for (k,v) in style_option_list("animations", animation.as_ref().unwrap_or(&"".to_owned()))?{
|
@for (k,v) in style_option_list("animations", animation)?{
|
||||||
option value=(k){(v)}
|
option value=(k){(v)}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,7 @@ pub fn search(
|
|||||||
small{(result.url)}
|
small{(result.url)}
|
||||||
p{(PreEscaped(&result.description))}
|
p{(PreEscaped(&result.description))}
|
||||||
.upstream_engines{
|
.upstream_engines{
|
||||||
@for name in result.clone().engine{
|
@for name in &result.engine {
|
||||||
span{(name)}
|
span{(name)}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,17 @@
|
|||||||
use std::net::TcpListener;
|
use std::{net::TcpListener, sync::OnceLock};
|
||||||
|
|
||||||
use websurfx::{config::parser::Config, run, templates::views};
|
use websurfx::{config::parser::Config, run, templates::views};
|
||||||
|
|
||||||
|
/// A static constant for holding the parsed config.
|
||||||
|
static CONFIG: OnceLock<Config> = OnceLock::new();
|
||||||
|
|
||||||
// Starts a new instance of the HTTP server, bound to a random available port
|
// Starts a new instance of the HTTP server, bound to a random available port
|
||||||
async fn spawn_app() -> String {
|
async fn spawn_app() -> String {
|
||||||
// Binding to port 0 will trigger the OS to assign a port for us.
|
// Binding to port 0 will trigger the OS to assign a port for us.
|
||||||
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
|
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
|
||||||
let port = listener.local_addr().unwrap().port();
|
let port = listener.local_addr().unwrap().port();
|
||||||
let config = Config::parse(false).unwrap();
|
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
|
||||||
let cache = websurfx::cache::cacher::create_cache(&config).await;
|
let cache = websurfx::cache::cacher::create_cache(config).await;
|
||||||
let server = run(listener, config, cache).expect("Failed to bind address");
|
let server = run(listener, config, cache).expect("Failed to bind address");
|
||||||
|
|
||||||
tokio::spawn(server);
|
tokio::spawn(server);
|
||||||
|
Loading…
Reference in New Issue
Block a user