0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-21 13:38:21 -05:00

Merge branch 'rolling' into FIX/463_results_from_different_search_engines_get_cached_as_the_same_key

This commit is contained in:
alamin655 2024-01-11 16:47:15 +05:30 committed by GitHub
commit 326131aac4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 432 additions and 15 deletions

101
Cargo.lock generated
View File

@ -243,6 +243,16 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aead"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0"
dependencies = [
"crypto-common",
"generic-array",
]
[[package]]
name = "ahash"
version = "0.7.7"
@ -600,6 +610,30 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chacha20"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818"
dependencies = [
"cfg-if 1.0.0",
"cipher",
"cpufeatures",
]
[[package]]
name = "chacha20poly1305"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35"
dependencies = [
"aead",
"chacha20",
"cipher",
"poly1305",
"zeroize",
]
[[package]]
name = "ci_info"
version = "0.10.2"
@ -636,6 +670,17 @@ dependencies = [
"half",
]
[[package]]
name = "cipher"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
dependencies = [
"crypto-common",
"inout",
"zeroize",
]
[[package]]
name = "clap"
version = "4.4.12"
@ -897,6 +942,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"rand_core 0.6.4",
"typenum",
]
@ -1709,6 +1755,15 @@ dependencies = [
"hashbrown 0.14.3",
]
[[package]]
name = "inout"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
dependencies = [
"generic-array",
]
[[package]]
name = "iovec"
version = "0.1.4"
@ -2219,6 +2274,12 @@ version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "opaque-debug"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
name = "openssl"
version = "0.10.62"
@ -2521,6 +2582,17 @@ version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a"
[[package]]
name = "poly1305"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf"
dependencies = [
"cpufeatures",
"opaque-debug",
"universal-hash",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
@ -3392,6 +3464,12 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
[[package]]
name = "subtle"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "syn"
version = "0.15.44"
@ -3877,6 +3955,16 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
[[package]]
name = "universal-hash"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea"
dependencies = [
"crypto-common",
"subtle",
]
[[package]]
name = "untrusted"
version = "0.9.0"
@ -4058,7 +4146,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
[[package]]
name = "websurfx"
version = "1.7.3"
version = "1.9.0"
dependencies = [
"actix-cors",
"actix-files",
@ -4066,7 +4154,12 @@ dependencies = [
"actix-web",
"async-once-cell",
"async-trait",
"base64 0.21.5",
"blake3",
"brotli",
"cfg-if 1.0.0",
"chacha20",
"chacha20poly1305",
"criterion",
"dhat",
"env_logger",
@ -4328,3 +4421,9 @@ dependencies = [
"quote 1.0.33",
"syn 2.0.43",
]
[[package]]
name = "zeroize"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"

View File

@ -1,6 +1,6 @@
[package]
name = "websurfx"
version = "1.7.3"
version = "1.9.0"
edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx"
@ -38,6 +38,11 @@ mimalloc = { version = "0.1.38", default-features = false }
async-once-cell = {version="0.5.3", default-features=false}
actix-governor = {version="0.5.0", default-features=false}
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true}
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
chacha20 = {version="0.9.1", default-features=false, optional=true}
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
cfg-if = {version="1.0.0", default-features=false,optional=true}
[dev-dependencies]
rusty-hook = {version="^0.11.2", default-features=false}
@ -78,4 +83,8 @@ strip = "debuginfo"
default = ["memory-cache"]
dhat-heap = ["dep:dhat"]
memory-cache = ["dep:mini-moka"]
redis-cache = ["dep:redis"]
redis-cache = ["dep:redis","dep:base64"]
compress-cache-results = ["dep:brotli","dep:cfg-if"]
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
cec-cache-results = ["compress-cache-results","encrypt-cache-results"]

View File

@ -2,8 +2,9 @@
//!
//! This module contains the main function which handles the logging of the application to the
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
#[cfg(not(feature = "dhat-heap"))]
use mimalloc::MiMalloc;
use std::net::TcpListener;
use websurfx::{cache::cacher::create_cache, config::parser::Config, run};

279
src/cache/cacher.rs vendored
View File

@ -4,6 +4,7 @@
use error_stack::Report;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::Cache as MokaCache;
#[cfg(feature = "memory-cache")]
use std::time::Duration;
use tokio::sync::Mutex;
@ -14,6 +15,9 @@ use super::error::CacheError;
#[cfg(feature = "redis-cache")]
use super::redis_cacher::RedisCache;
#[cfg(any(feature = "encrypt-cache-results", feature = "cec-cache-results"))]
use super::encryption::*;
/// Abstraction trait for common methods provided by a cache backend.
#[async_trait::async_trait]
pub trait Cacher: Send + Sync {
@ -69,6 +73,237 @@ pub trait Cacher: Send + Sync {
fn hash_url(&self, url: &str) -> String {
blake3::hash(url.as_bytes()).to_string()
}
/// A helper function that returns either encrypted or decrypted results.
/// Feature flags (**encrypt-cache-results or cec-cache-results**) are required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
/// * `encrypt` - A boolean to choose whether to encrypt or decrypt the bytes
///
/// # Error
/// Returns either encrypted or decrypted bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(
// feature = "compress-cache-results",
feature = "encrypt-cache-results",
feature = "cec-cache-results"
))]
fn encrypt_or_decrypt_results(
&mut self,
mut bytes: Vec<u8>,
encrypt: bool,
) -> Result<Vec<u8>, Report<CacheError>> {
use chacha20poly1305::{
aead::{Aead, AeadCore, KeyInit, OsRng},
ChaCha20Poly1305,
};
let cipher = CIPHER.get_or_init(|| {
let key = ChaCha20Poly1305::generate_key(&mut OsRng);
ChaCha20Poly1305::new(&key)
});
let encryption_key = ENCRYPTION_KEY.get_or_init(
|| ChaCha20Poly1305::generate_nonce(&mut OsRng), // 96-bits; unique per message
);
bytes = if encrypt {
cipher
.encrypt(encryption_key, bytes.as_ref())
.map_err(|_| CacheError::EncryptionError)?
} else {
cipher
.decrypt(encryption_key, bytes.as_ref())
.map_err(|_| CacheError::EncryptionError)?
};
Ok(bytes)
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the compressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
fn compress_results(&mut self, mut bytes: Vec<u8>) -> Result<Vec<u8>, Report<CacheError>> {
use std::io::Write;
let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22);
writer
.write_all(&bytes)
.map_err(|_| CacheError::CompressionError)?;
bytes = writer.into_inner();
Ok(bytes)
}
/// A helper function that returns compressed-encrypted results.
/// Feature flag (**cec-cache-results**) is required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(feature = "cec-cache-results")]
fn compress_encrypt_compress_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
// compress first
bytes = self.compress_results(bytes)?;
// encrypt
bytes = self.encrypt_or_decrypt_results(bytes, true)?;
// compress again;
bytes = self.compress_results(bytes)?;
Ok(bytes)
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
/// If bytes where
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the uncompressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
cfg_if::cfg_if! {
if #[cfg(feature = "compress-cache-results")]
{
decompress_util(bytes)
}
else if #[cfg(feature = "cec-cache-results")]
{
let decompressed = decompress_util(bytes)?;
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
decompress_util(&decrypted)
}
}
}
/// A helper function that compresses or encrypts search results before they're inserted into a cache store
/// # Arguments
///
/// * `search_results` - A reference to the search_Results to process.
///
///
/// # Error
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
/// on failure.
fn pre_process_search_results(
&mut self,
search_results: &SearchResults,
) -> Result<Vec<u8>, Report<CacheError>> {
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
let mut bytes: Vec<u8> = search_results.try_into()?;
#[cfg(feature = "compress-cache-results")]
{
let compressed = self.compress_results(bytes)?;
bytes = compressed;
}
#[cfg(feature = "encrypt-cache-results")]
{
let encrypted = self.encrypt_or_decrypt_results(bytes, true)?;
bytes = encrypted;
}
#[cfg(feature = "cec-cache-results")]
{
let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?;
bytes = compressed_encrypted_compressed;
}
Ok(bytes)
}
/// A helper function that decompresses or decrypts search results after they're fetched from the cache-store
/// # Arguments
///
/// * `bytes` - A Vec of bytes stores in the cache.
///
///
/// # Error
/// Returns the SearchResults struct on success otherwise it returns a CacheError
/// on failure.
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
fn post_process_search_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<SearchResults, Report<CacheError>> {
#[cfg(feature = "compress-cache-results")]
{
let decompressed = self.decompress_results(&bytes)?;
bytes = decompressed
}
#[cfg(feature = "encrypt-cache-results")]
{
let decrypted = self.encrypt_or_decrypt_results(bytes, false)?;
bytes = decrypted
}
#[cfg(feature = "cec-cache-results")]
{
let decompressed_decrypted = self.decompress_results(&bytes)?;
bytes = decompressed_decrypted;
}
Ok(bytes.try_into()?)
}
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
/// If bytes where
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the uncompressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
use std::io::Write;
let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096);
writer
.write_all(input)
.map_err(|_| CacheError::CompressionError)?;
let bytes = writer
.into_inner()
.map_err(|_| CacheError::CompressionError)?;
Ok(bytes)
}
#[cfg(feature = "redis-cache")]
@ -85,10 +320,14 @@ impl Cacher for RedisCache {
}
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
use base64::Engine;
let hashed_url_string: &str = &self.hash_url(url);
let json = self.cached_json(hashed_url_string).await?;
Ok(serde_json::from_str::<SearchResults>(&json)
.map_err(|_| CacheError::SerializationError)?)
let base64_string = self.cached_json(hashed_url_string).await?;
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(base64_string)
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
self.post_process_search_results(bytes)
}
async fn cache_results(
@ -96,10 +335,29 @@ impl Cacher for RedisCache {
search_results: &SearchResults,
url: &str,
) -> Result<(), Report<CacheError>> {
let json =
serde_json::to_string(search_results).map_err(|_| CacheError::SerializationError)?;
use base64::Engine;
let bytes = self.pre_process_search_results(search_results)?;
let base64_string = base64::engine::general_purpose::STANDARD_NO_PAD.encode(bytes);
let hashed_url_string = self.hash_url(url);
self.cache_json(&json, &hashed_url_string).await
self.cache_json(&base64_string, &hashed_url_string).await
}
}
/// TryInto implementation for SearchResults from Vec<u8>
use std::convert::TryInto;
impl TryInto<SearchResults> for Vec<u8> {
type Error = CacheError;
fn try_into(self) -> Result<SearchResults, Self::Error> {
serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError)
}
}
impl TryInto<Vec<u8>> for &SearchResults {
type Error = CacheError;
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
serde_json::to_vec(self).map_err(|_| CacheError::SerializationError)
}
}
@ -107,7 +365,7 @@ impl Cacher for RedisCache {
#[cfg(feature = "memory-cache")]
pub struct InMemoryCache {
/// The backend cache which stores data.
cache: MokaCache<String, SearchResults>,
cache: MokaCache<String, Vec<u8>>,
}
#[cfg(feature = "memory-cache")]
@ -126,7 +384,7 @@ impl Cacher for InMemoryCache {
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string) {
Some(res) => Ok(res),
Some(res) => self.post_process_search_results(res),
None => Err(Report::new(CacheError::MissingValue)),
}
}
@ -137,7 +395,8 @@ impl Cacher for InMemoryCache {
url: &str,
) -> Result<(), Report<CacheError>> {
let hashed_url_string = self.hash_url(url);
self.cache.insert(hashed_url_string, search_results.clone());
let bytes = self.pre_process_search_results(search_results)?;
self.cache.insert(hashed_url_string, bytes);
Ok(())
}
}
@ -282,3 +541,5 @@ pub async fn create_cache(config: &Config) -> impl Cacher {
#[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))]
return DisabledCache::build(config).await;
}
//#[cfg(feature = "Compress-cache-results")]

25
src/cache/encryption.rs vendored Normal file
View File

@ -0,0 +1,25 @@
use chacha20poly1305::{
consts::{B0, B1},
ChaChaPoly1305,
};
use std::sync::OnceLock;
use chacha20::{
cipher::{
generic_array::GenericArray,
typenum::{UInt, UTerm},
StreamCipherCoreWrapper,
},
ChaChaCore,
};
/// The ChaCha20 core wrapped in a stream cipher for use in ChaCha20-Poly1305 authenticated encryption.
type StreamCipherCoreWrapperType =
StreamCipherCoreWrapper<ChaChaCore<UInt<UInt<UInt<UInt<UTerm, B1>, B0>, B1>, B0>>>;
/// Our ChaCha20-Poly1305 cipher instance, lazily initialized.
pub static CIPHER: OnceLock<ChaChaPoly1305<StreamCipherCoreWrapperType>> = OnceLock::new();
/// The type alias for our encryption key, a 32-byte array.
type GenericArrayType = GenericArray<u8, UInt<UInt<UInt<UInt<UTerm, B1>, B1>, B0>, B0>>;
/// Our encryption key, lazily initialized.
pub static ENCRYPTION_KEY: OnceLock<GenericArrayType> = OnceLock::new();

18
src/cache/error.rs vendored
View File

@ -18,6 +18,12 @@ pub enum CacheError {
SerializationError,
/// Returned when the value is missing.
MissingValue,
/// whenever encryption or decryption of cache results fails
EncryptionError,
/// Whenever compression of the cache results fails
CompressionError,
/// Whenever base64 decoding failed
Base64DecodingOrEncodingError,
}
impl fmt::Display for CacheError {
@ -43,6 +49,18 @@ impl fmt::Display for CacheError {
CacheError::SerializationError => {
write!(f, "Unable to serialize, deserialize from the cache")
}
CacheError::EncryptionError => {
write!(f, "Failed to encrypt or decrypt cache-results")
}
CacheError::CompressionError => {
write!(f, "failed to compress or uncompress cache results")
}
CacheError::Base64DecodingOrEncodingError => {
write!(f, "base64 encoding or decoding failed")
}
}
}
}

6
src/cache/mod.rs vendored
View File

@ -1,7 +1,11 @@
//! This module provides the modules which provide the functionality to cache the aggregated
//! results fetched and aggregated from the upstream search engines in a json format.
pub mod cacher;
#[cfg(any(feature = "encrypt-cache-results", feature = "cec-cache-results"))]
/// encryption module contains encryption utils such the cipher and key
pub mod encryption;
pub mod error;
#[cfg(feature = "redis-cache")]
pub mod redis_cacher;

View File

@ -44,7 +44,7 @@ impl RedisCache {
let mut tasks: Vec<_> = Vec::new();
for _ in 0..pool_size {
tasks.push(client.get_tokio_connection_manager());
tasks.push(client.get_connection_manager());
}
let redis_cache = RedisCache {