From efa8efc6c7bc01f5279f3990e7c15880db831dda Mon Sep 17 00:00:00 2001 From: Evan Yang <31290895+evanyang1@users.noreply.github.com> Date: Thu, 11 Jan 2024 03:10:35 -0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Compression=20and=20encryption=20fo?= =?UTF-8?q?r=20the=20cached=20search=20results=20(#443)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * attempt1 * rough draft * add features and their optional dependancies * add encryption and compression error variants * add a sample implementation to cache trait * Update src/cache/cacher.rs Co-authored-by: neon_arch * adjust comment so feature flag would apply? * adjust feature flag so it applies? * formatting * Update src/cache/cacher.rs update documentation Co-authored-by: neon_arch * [features]Add base64 and chacha20 dependencies for compress-cache-results and encrypt-cache-results * move encryption key and cipher logic to separate sub module * added cacha20 and cec-results feature * added cacha20 and cec-results feature * added compression and encryption helper functions to trait implementations * added compression and encryption implementation for inMemoryCache * base64 is only requried when redis-cache feature is enabled * add error case for base64 and encryption/compression implementation to redisCache * Refactor cacher to remove regex dependency * fmt cache error and cacher * Update Cargo.toml disabling the unneeded default-features Co-authored-by: neon_arch * fix unused import warning for mimalloc * remove deprecated method * add doc comments for encryption module * fix known bugs and use cfg-if module * make cfg-if an optional dependency * use feature-flag instead of maco lint * add comment to explain type complexity * bump app version * Update src/cache/encryption.rs Co-authored-by: neon_arch * fixed type complexity and add docs for types --------- Co-authored-by: Spencer Najib Co-authored-by: alamin655 Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: neon_arch Co-authored-by: Spencerjibz <=spencernajib2@gmail.com> Co-authored-by: spencer --- Cargo.lock | 101 +++++++++++++- Cargo.toml | 13 +- src/bin/websurfx.rs | 3 +- src/cache/cacher.rs | 279 ++++++++++++++++++++++++++++++++++++-- src/cache/encryption.rs | 25 ++++ src/cache/error.rs | 18 +++ src/cache/mod.rs | 6 +- src/cache/redis_cacher.rs | 2 +- 8 files changed, 432 insertions(+), 15 deletions(-) create mode 100644 src/cache/encryption.rs diff --git a/Cargo.lock b/Cargo.lock index 203b018..7befc09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -243,6 +243,16 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "ahash" version = "0.7.7" @@ -600,6 +610,30 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if 1.0.0", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "ci_info" version = "0.10.2" @@ -636,6 +670,17 @@ dependencies = [ "half", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clap" version = "4.4.12" @@ -897,6 +942,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] @@ -1709,6 +1755,15 @@ dependencies = [ "hashbrown 0.14.3", ] +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + [[package]] name = "iovec" version = "0.1.4" @@ -2219,6 +2274,12 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + [[package]] name = "openssl" version = "0.10.62" @@ -2521,6 +2582,17 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -3392,6 +3464,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + [[package]] name = "syn" version = "0.15.44" @@ -3877,6 +3955,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "untrusted" version = "0.9.0" @@ -4058,7 +4146,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" [[package]] name = "websurfx" -version = "1.7.3" +version = "1.9.0" dependencies = [ "actix-cors", "actix-files", @@ -4066,7 +4154,12 @@ dependencies = [ "actix-web", "async-once-cell", "async-trait", + "base64 0.21.5", "blake3", + "brotli", + "cfg-if 1.0.0", + "chacha20", + "chacha20poly1305", "criterion", "dhat", "env_logger", @@ -4328,3 +4421,9 @@ dependencies = [ "quote 1.0.33", "syn 2.0.43", ] + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" diff --git a/Cargo.toml b/Cargo.toml index c57ac7c..1045c6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "1.7.3" +version = "1.9.0" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" @@ -38,6 +38,11 @@ mimalloc = { version = "0.1.38", default-features = false } async-once-cell = {version="0.5.3", default-features=false} actix-governor = {version="0.5.0", default-features=false} mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]} +brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true} +chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true} +chacha20 = {version="0.9.1", default-features=false, optional=true} +base64 = {version="0.21.5", default-features=false, features=["std"], optional=true} +cfg-if = {version="1.0.0", default-features=false,optional=true} [dev-dependencies] rusty-hook = {version="^0.11.2", default-features=false} @@ -78,4 +83,8 @@ strip = "debuginfo" default = ["memory-cache"] dhat-heap = ["dep:dhat"] memory-cache = ["dep:mini-moka"] -redis-cache = ["dep:redis"] +redis-cache = ["dep:redis","dep:base64"] +compress-cache-results = ["dep:brotli","dep:cfg-if"] +encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"] +cec-cache-results = ["compress-cache-results","encrypt-cache-results"] + diff --git a/src/bin/websurfx.rs b/src/bin/websurfx.rs index aad5806..1852695 100644 --- a/src/bin/websurfx.rs +++ b/src/bin/websurfx.rs @@ -2,8 +2,9 @@ //! //! This module contains the main function which handles the logging of the application to the //! stdout and handles the command line arguments provided and launches the `websurfx` server. - +#[cfg(not(feature = "dhat-heap"))] use mimalloc::MiMalloc; + use std::net::TcpListener; use websurfx::{cache::cacher::create_cache, config::parser::Config, run}; diff --git a/src/cache/cacher.rs b/src/cache/cacher.rs index 577395c..d1144e3 100644 --- a/src/cache/cacher.rs +++ b/src/cache/cacher.rs @@ -4,6 +4,7 @@ use error_stack::Report; #[cfg(feature = "memory-cache")] use mini_moka::sync::Cache as MokaCache; + #[cfg(feature = "memory-cache")] use std::time::Duration; use tokio::sync::Mutex; @@ -14,6 +15,9 @@ use super::error::CacheError; #[cfg(feature = "redis-cache")] use super::redis_cacher::RedisCache; +#[cfg(any(feature = "encrypt-cache-results", feature = "cec-cache-results"))] +use super::encryption::*; + /// Abstraction trait for common methods provided by a cache backend. #[async_trait::async_trait] pub trait Cacher: Send + Sync { @@ -69,6 +73,237 @@ pub trait Cacher: Send + Sync { fn hash_url(&self, url: &str) -> String { blake3::hash(url.as_bytes()).to_string() } + + /// A helper function that returns either encrypted or decrypted results. + /// Feature flags (**encrypt-cache-results or cec-cache-results**) are required for this to work. + /// + /// # Arguments + /// + /// * `bytes` - It takes a slice of bytes as an argument. + /// * `encrypt` - A boolean to choose whether to encrypt or decrypt the bytes + + /// + /// # Error + /// Returns either encrypted or decrypted bytes on success otherwise it returns a CacheError + /// on failure. + #[cfg(any( + // feature = "compress-cache-results", + feature = "encrypt-cache-results", + feature = "cec-cache-results" + ))] + fn encrypt_or_decrypt_results( + &mut self, + mut bytes: Vec, + encrypt: bool, + ) -> Result, Report> { + use chacha20poly1305::{ + aead::{Aead, AeadCore, KeyInit, OsRng}, + ChaCha20Poly1305, + }; + + let cipher = CIPHER.get_or_init(|| { + let key = ChaCha20Poly1305::generate_key(&mut OsRng); + ChaCha20Poly1305::new(&key) + }); + + let encryption_key = ENCRYPTION_KEY.get_or_init( + || ChaCha20Poly1305::generate_nonce(&mut OsRng), // 96-bits; unique per message + ); + + bytes = if encrypt { + cipher + .encrypt(encryption_key, bytes.as_ref()) + .map_err(|_| CacheError::EncryptionError)? + } else { + cipher + .decrypt(encryption_key, bytes.as_ref()) + .map_err(|_| CacheError::EncryptionError)? + }; + + Ok(bytes) + } + + /// A helper function that returns compressed results. + /// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work. + /// + /// # Arguments + /// + /// * `bytes` - It takes a slice of bytes as an argument. + + /// + /// # Error + /// Returns the compressed bytes on success otherwise it returns a CacheError + /// on failure. + #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] + fn compress_results(&mut self, mut bytes: Vec) -> Result, Report> { + use std::io::Write; + let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22); + writer + .write_all(&bytes) + .map_err(|_| CacheError::CompressionError)?; + bytes = writer.into_inner(); + Ok(bytes) + } + + /// A helper function that returns compressed-encrypted results. + /// Feature flag (**cec-cache-results**) is required for this to work. + /// + /// # Arguments + /// + /// * `bytes` - It takes a slice of bytes as an argument. + + /// + /// # Error + /// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError + /// on failure. + #[cfg(feature = "cec-cache-results")] + fn compress_encrypt_compress_results( + &mut self, + mut bytes: Vec, + ) -> Result, Report> { + // compress first + bytes = self.compress_results(bytes)?; + // encrypt + bytes = self.encrypt_or_decrypt_results(bytes, true)?; + + // compress again; + bytes = self.compress_results(bytes)?; + + Ok(bytes) + } + + /// A helper function that returns compressed results. + /// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work. + /// If bytes where + /// # Arguments + /// + /// * `bytes` - It takes a slice of bytes as an argument. + + /// + /// # Error + /// Returns the uncompressed bytes on success otherwise it returns a CacheError + /// on failure. + + #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] + fn decompress_results(&mut self, bytes: &[u8]) -> Result, Report> { + cfg_if::cfg_if! { + if #[cfg(feature = "compress-cache-results")] + { + decompress_util(bytes) + + } + else if #[cfg(feature = "cec-cache-results")] + { + let decompressed = decompress_util(bytes)?; + let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?; + + decompress_util(&decrypted) + + } + } + } + + /// A helper function that compresses or encrypts search results before they're inserted into a cache store + + /// # Arguments + /// + /// * `search_results` - A reference to the search_Results to process. + /// + + /// + /// # Error + /// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError + /// on failure. + fn pre_process_search_results( + &mut self, + search_results: &SearchResults, + ) -> Result, Report> { + #[allow(unused_mut)] // needs to be mutable when any of the features is enabled + let mut bytes: Vec = search_results.try_into()?; + #[cfg(feature = "compress-cache-results")] + { + let compressed = self.compress_results(bytes)?; + bytes = compressed; + } + + #[cfg(feature = "encrypt-cache-results")] + { + let encrypted = self.encrypt_or_decrypt_results(bytes, true)?; + bytes = encrypted; + } + + #[cfg(feature = "cec-cache-results")] + { + let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?; + bytes = compressed_encrypted_compressed; + } + + Ok(bytes) + } + + /// A helper function that decompresses or decrypts search results after they're fetched from the cache-store + + /// # Arguments + /// + /// * `bytes` - A Vec of bytes stores in the cache. + /// + + /// + /// # Error + /// Returns the SearchResults struct on success otherwise it returns a CacheError + /// on failure. + + #[allow(unused_mut)] // needs to be mutable when any of the features is enabled + fn post_process_search_results( + &mut self, + mut bytes: Vec, + ) -> Result> { + #[cfg(feature = "compress-cache-results")] + { + let decompressed = self.decompress_results(&bytes)?; + bytes = decompressed + } + + #[cfg(feature = "encrypt-cache-results")] + { + let decrypted = self.encrypt_or_decrypt_results(bytes, false)?; + bytes = decrypted + } + + #[cfg(feature = "cec-cache-results")] + { + let decompressed_decrypted = self.decompress_results(&bytes)?; + bytes = decompressed_decrypted; + } + + Ok(bytes.try_into()?) + } +} + +/// A helper function that returns compressed results. +/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work. +/// If bytes where +/// # Arguments +/// +/// * `bytes` - It takes a slice of bytes as an argument. + +/// +/// # Error +/// Returns the uncompressed bytes on success otherwise it returns a CacheError +/// on failure. + +#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] +fn decompress_util(input: &[u8]) -> Result, Report> { + use std::io::Write; + let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096); + + writer + .write_all(input) + .map_err(|_| CacheError::CompressionError)?; + let bytes = writer + .into_inner() + .map_err(|_| CacheError::CompressionError)?; + Ok(bytes) } #[cfg(feature = "redis-cache")] @@ -85,10 +320,14 @@ impl Cacher for RedisCache { } async fn cached_results(&mut self, url: &str) -> Result> { + use base64::Engine; let hashed_url_string: &str = &self.hash_url(url); - let json = self.cached_json(hashed_url_string).await?; - Ok(serde_json::from_str::(&json) - .map_err(|_| CacheError::SerializationError)?) + let base64_string = self.cached_json(hashed_url_string).await?; + + let bytes = base64::engine::general_purpose::STANDARD_NO_PAD + .decode(base64_string) + .map_err(|_| CacheError::Base64DecodingOrEncodingError)?; + self.post_process_search_results(bytes) } async fn cache_results( @@ -96,10 +335,29 @@ impl Cacher for RedisCache { search_results: &SearchResults, url: &str, ) -> Result<(), Report> { - let json = - serde_json::to_string(search_results).map_err(|_| CacheError::SerializationError)?; + use base64::Engine; + let bytes = self.pre_process_search_results(search_results)?; + let base64_string = base64::engine::general_purpose::STANDARD_NO_PAD.encode(bytes); let hashed_url_string = self.hash_url(url); - self.cache_json(&json, &hashed_url_string).await + self.cache_json(&base64_string, &hashed_url_string).await + } +} +/// TryInto implementation for SearchResults from Vec +use std::convert::TryInto; + +impl TryInto for Vec { + type Error = CacheError; + + fn try_into(self) -> Result { + serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError) + } +} + +impl TryInto> for &SearchResults { + type Error = CacheError; + + fn try_into(self) -> Result, Self::Error> { + serde_json::to_vec(self).map_err(|_| CacheError::SerializationError) } } @@ -107,7 +365,7 @@ impl Cacher for RedisCache { #[cfg(feature = "memory-cache")] pub struct InMemoryCache { /// The backend cache which stores data. - cache: MokaCache, + cache: MokaCache>, } #[cfg(feature = "memory-cache")] @@ -126,7 +384,7 @@ impl Cacher for InMemoryCache { async fn cached_results(&mut self, url: &str) -> Result> { let hashed_url_string = self.hash_url(url); match self.cache.get(&hashed_url_string) { - Some(res) => Ok(res), + Some(res) => self.post_process_search_results(res), None => Err(Report::new(CacheError::MissingValue)), } } @@ -137,7 +395,8 @@ impl Cacher for InMemoryCache { url: &str, ) -> Result<(), Report> { let hashed_url_string = self.hash_url(url); - self.cache.insert(hashed_url_string, search_results.clone()); + let bytes = self.pre_process_search_results(search_results)?; + self.cache.insert(hashed_url_string, bytes); Ok(()) } } @@ -282,3 +541,5 @@ pub async fn create_cache(config: &Config) -> impl Cacher { #[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))] return DisabledCache::build(config).await; } + +//#[cfg(feature = "Compress-cache-results")] diff --git a/src/cache/encryption.rs b/src/cache/encryption.rs new file mode 100644 index 0000000..a834003 --- /dev/null +++ b/src/cache/encryption.rs @@ -0,0 +1,25 @@ +use chacha20poly1305::{ + consts::{B0, B1}, + ChaChaPoly1305, +}; +use std::sync::OnceLock; + +use chacha20::{ + cipher::{ + generic_array::GenericArray, + typenum::{UInt, UTerm}, + StreamCipherCoreWrapper, + }, + ChaChaCore, +}; + +/// The ChaCha20 core wrapped in a stream cipher for use in ChaCha20-Poly1305 authenticated encryption. +type StreamCipherCoreWrapperType = + StreamCipherCoreWrapper, B0>, B1>, B0>>>; +/// Our ChaCha20-Poly1305 cipher instance, lazily initialized. +pub static CIPHER: OnceLock> = OnceLock::new(); + +/// The type alias for our encryption key, a 32-byte array. +type GenericArrayType = GenericArray, B1>, B0>, B0>>; +/// Our encryption key, lazily initialized. +pub static ENCRYPTION_KEY: OnceLock = OnceLock::new(); diff --git a/src/cache/error.rs b/src/cache/error.rs index 62c9098..989ddf1 100644 --- a/src/cache/error.rs +++ b/src/cache/error.rs @@ -18,6 +18,12 @@ pub enum CacheError { SerializationError, /// Returned when the value is missing. MissingValue, + /// whenever encryption or decryption of cache results fails + EncryptionError, + /// Whenever compression of the cache results fails + CompressionError, + /// Whenever base64 decoding failed + Base64DecodingOrEncodingError, } impl fmt::Display for CacheError { @@ -43,6 +49,18 @@ impl fmt::Display for CacheError { CacheError::SerializationError => { write!(f, "Unable to serialize, deserialize from the cache") } + + CacheError::EncryptionError => { + write!(f, "Failed to encrypt or decrypt cache-results") + } + + CacheError::CompressionError => { + write!(f, "failed to compress or uncompress cache results") + } + + CacheError::Base64DecodingOrEncodingError => { + write!(f, "base64 encoding or decoding failed") + } } } } diff --git a/src/cache/mod.rs b/src/cache/mod.rs index 887f119..5bb6d5a 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -1,7 +1,11 @@ //! This module provides the modules which provide the functionality to cache the aggregated //! results fetched and aggregated from the upstream search engines in a json format. - pub mod cacher; + +#[cfg(any(feature = "encrypt-cache-results", feature = "cec-cache-results"))] +/// encryption module contains encryption utils such the cipher and key +pub mod encryption; pub mod error; + #[cfg(feature = "redis-cache")] pub mod redis_cacher; diff --git a/src/cache/redis_cacher.rs b/src/cache/redis_cacher.rs index 7d3295a..cfb2a2e 100644 --- a/src/cache/redis_cacher.rs +++ b/src/cache/redis_cacher.rs @@ -44,7 +44,7 @@ impl RedisCache { let mut tasks: Vec<_> = Vec::new(); for _ in 0..pool_size { - tasks.push(client.get_tokio_connection_manager()); + tasks.push(client.get_connection_manager()); } let redis_cache = RedisCache {