0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-22 22:18:23 -05:00

Compare commits

..

No commits in common. "e385a577e021fe2ba1f08e549658b93bdce56c2c" and "6aa99922a6abc6657288439c8d6c9a384ec8aa65" have entirely different histories.

6 changed files with 376 additions and 389 deletions

617
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[package]
name = "websurfx"
version = "1.10.9"
version = "1.9.20"
edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx"
@ -14,7 +14,7 @@ path = "src/bin/websurfx.rs"
[dependencies]
reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
tokio = {version="1.32.0",features=["rt-multi-thread","macros", "fs", "io-util"], default-features = false}
tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false}
serde = {version="1.0.196", default-features=false, features=["derive"]}
serde_json = {version="1.0.109", default-features=false}
maud = {version="0.25.0", default-features=false, features=["actix-web"]}
@ -38,7 +38,7 @@ mimalloc = { version = "0.1.38", default-features = false }
async-once-cell = {version="0.5.3", default-features=false}
actix-governor = {version="0.5.0", default-features=false}
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
async-compression = { version = "0.4.6", default-features = false, features=["brotli","tokio"], optional=true}
brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true}
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
chacha20 = {version="0.9.1", default-features=false, optional=true}
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
@ -84,7 +84,7 @@ default = ["memory-cache"]
dhat-heap = ["dep:dhat"]
memory-cache = ["dep:mini-moka"]
redis-cache = ["dep:redis","dep:base64"]
compress-cache-results = ["dep:async-compression","dep:cfg-if"]
compress-cache-results = ["dep:brotli","dep:cfg-if"]
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
cec-cache-results = ["compress-cache-results","encrypt-cache-results"]

68
src/cache/cacher.rs vendored
View File

@ -93,7 +93,7 @@ pub trait Cacher: Send + Sync {
feature = "encrypt-cache-results",
feature = "cec-cache-results"
))]
async fn encrypt_or_decrypt_results(
fn encrypt_or_decrypt_results(
&mut self,
mut bytes: Vec<u8>,
encrypt: bool,
@ -137,19 +137,11 @@ pub trait Cacher: Send + Sync {
/// Returns the compressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn compress_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
fn compress_results(&mut self, mut bytes: Vec<u8>) -> Result<Vec<u8>, Report<CacheError>> {
use std::io::Write;
let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22);
writer
.write_all(&bytes)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
.map_err(|_| CacheError::CompressionError)?;
bytes = writer.into_inner();
Ok(bytes)
@ -167,17 +159,17 @@ pub trait Cacher: Send + Sync {
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(feature = "cec-cache-results")]
async fn compress_encrypt_compress_results(
fn compress_encrypt_compress_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
// compress first
bytes = self.compress_results(bytes).await?;
bytes = self.compress_results(bytes)?;
// encrypt
bytes = self.encrypt_or_decrypt_results(bytes, true).await?;
bytes = self.encrypt_or_decrypt_results(bytes, true)?;
// compress again;
bytes = self.compress_results(bytes).await?;
bytes = self.compress_results(bytes)?;
Ok(bytes)
}
@ -195,11 +187,11 @@ pub trait Cacher: Send + Sync {
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
cfg_if::cfg_if! {
if #[cfg(feature = "compress-cache-results")]
{
decompress_util(bytes).await
decompress_util(bytes)
}
else if #[cfg(feature = "cec-cache-results")]
@ -207,7 +199,7 @@ pub trait Cacher: Send + Sync {
let decompressed = decompress_util(bytes)?;
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
decompress_util(&decrypted).await
decompress_util(&decrypted)
}
}
@ -224,7 +216,7 @@ pub trait Cacher: Send + Sync {
/// # Error
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
/// on failure.
async fn pre_process_search_results(
fn pre_process_search_results(
&mut self,
search_results: &SearchResults,
) -> Result<Vec<u8>, Report<CacheError>> {
@ -232,20 +224,19 @@ pub trait Cacher: Send + Sync {
let mut bytes: Vec<u8> = search_results.try_into()?;
#[cfg(feature = "compress-cache-results")]
{
let compressed = self.compress_results(bytes).await?;
let compressed = self.compress_results(bytes)?;
bytes = compressed;
}
#[cfg(feature = "encrypt-cache-results")]
{
let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?;
let encrypted = self.encrypt_or_decrypt_results(bytes, true)?;
bytes = encrypted;
}
#[cfg(feature = "cec-cache-results")]
{
let compressed_encrypted_compressed =
self.compress_encrypt_compress_results(bytes).await?;
let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?;
bytes = compressed_encrypted_compressed;
}
@ -265,25 +256,25 @@ pub trait Cacher: Send + Sync {
/// on failure.
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
async fn post_process_search_results(
fn post_process_search_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<SearchResults, Report<CacheError>> {
#[cfg(feature = "compress-cache-results")]
{
let decompressed = self.decompress_results(&bytes).await?;
let decompressed = self.decompress_results(&bytes)?;
bytes = decompressed
}
#[cfg(feature = "encrypt-cache-results")]
{
let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?;
let decrypted = self.encrypt_or_decrypt_results(bytes, false)?;
bytes = decrypted
}
#[cfg(feature = "cec-cache-results")]
{
let decompressed_decrypted = self.decompress_results(&bytes).await?;
let decompressed_decrypted = self.decompress_results(&bytes)?;
bytes = decompressed_decrypted;
}
@ -304,19 +295,16 @@ pub trait Cacher: Send + Sync {
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new());
fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
use std::io::Write;
let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096);
writer
.write_all(input)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
let bytes = writer
.into_inner()
.map_err(|_| CacheError::CompressionError)?;
let bytes = writer.into_inner();
Ok(bytes)
}
@ -341,7 +329,7 @@ impl Cacher for RedisCache {
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(base64_string)
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
self.post_process_search_results(bytes).await
self.post_process_search_results(bytes)
}
async fn cache_results(
@ -357,7 +345,7 @@ impl Cacher for RedisCache {
let mut bytes = Vec::with_capacity(search_results_len);
for result in search_results {
let processed = self.pre_process_search_results(result).await?;
let processed = self.pre_process_search_results(result)?;
bytes.push(processed);
}
@ -417,7 +405,7 @@ impl Cacher for InMemoryCache {
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string) {
Some(res) => self.post_process_search_results(res).await,
Some(res) => self.post_process_search_results(res),
None => Err(Report::new(CacheError::MissingValue)),
}
}
@ -429,7 +417,7 @@ impl Cacher for InMemoryCache {
) -> Result<(), Report<CacheError>> {
for (url, search_result) in urls.iter().zip(search_results.iter()) {
let hashed_url_string = self.hash_url(url);
let bytes = self.pre_process_search_results(search_result).await?;
let bytes = self.pre_process_search_results(search_result)?;
self.cache.insert(hashed_url_string, bytes);
}

View File

@ -14,12 +14,12 @@ use regex::Regex;
use reqwest::{Client, ClientBuilder};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::{
fs::File,
io::{AsyncBufReadExt, BufReader},
task::JoinHandle,
use std::{fs::File, io::BufRead};
use std::{
io::{BufReader, Read},
time::Duration,
};
use tokio::task::JoinHandle;
/// A constant for holding the prebuilt Client globally in the app.
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
@ -169,15 +169,13 @@ pub async fn aggregate(
&mut result_map,
&mut blacklist_map,
file_path(FileType::BlockList)?,
)
.await?;
)?;
filter_with_lists(
&mut blacklist_map,
&mut result_map,
file_path(FileType::AllowList)?,
)
.await?;
)?;
drop(blacklist_map);
}
@ -198,16 +196,15 @@ pub async fn aggregate(
/// # Errors
///
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
pub async fn filter_with_lists(
pub fn filter_with_lists(
map_to_be_filtered: &mut Vec<(String, SearchResult)>,
resultant_map: &mut Vec<(String, SearchResult)>,
file_path: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let reader = BufReader::new(File::open(file_path).await?);
let mut lines = reader.lines();
let mut reader = BufReader::new(File::open(file_path)?);
while let Some(line) = lines.next_line().await? {
let re = Regex::new(line.trim())?;
for line in reader.by_ref().lines() {
let re = Regex::new(line?.trim())?;
let mut length = map_to_be_filtered.len();
let mut idx: usize = Default::default();
@ -239,8 +236,8 @@ mod tests {
use std::io::Write;
use tempfile::NamedTempFile;
#[tokio::test]
async fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
#[test]
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
// Create a map of search results to filter
let mut map_to_be_filtered = Vec::new();
map_to_be_filtered.push((
@ -274,8 +271,7 @@ mod tests {
&mut map_to_be_filtered,
&mut resultant_map,
file.path().to_str().unwrap(),
)
.await?;
)?;
assert_eq!(resultant_map.len(), 2);
assert!(resultant_map
@ -289,8 +285,8 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
#[test]
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
let mut map_to_be_filtered = Vec::new();
map_to_be_filtered.push((
"https://www.example.com".to_owned(),
@ -323,8 +319,7 @@ mod tests {
&mut map_to_be_filtered,
&mut resultant_map,
file.path().to_str().unwrap(),
)
.await?;
)?;
assert_eq!(resultant_map.len(), 1);
assert!(resultant_map
@ -338,8 +333,8 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn test_filter_with_lists_file_not_found() {
#[test]
fn test_filter_with_lists_file_not_found() {
let mut map_to_be_filtered = Vec::new();
let mut resultant_map = Vec::new();
@ -351,11 +346,11 @@ mod tests {
"non-existent-file.txt",
);
assert!(result.await.is_err());
assert!(result.is_err());
}
#[tokio::test]
async fn test_filter_with_lists_invalid_regex() {
#[test]
fn test_filter_with_lists_invalid_regex() {
let mut map_to_be_filtered = Vec::new();
map_to_be_filtered.push((
"https://www.example.com".to_owned(),
@ -381,6 +376,6 @@ mod tests {
file.path().to_str().unwrap(),
);
assert!(result.await.is_err());
assert!(result.is_err());
}
}

View File

@ -7,7 +7,7 @@ use crate::{
handler::{file_path, FileType},
};
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use tokio::fs::read_to_string;
use std::fs::read_to_string;
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")]
@ -43,7 +43,7 @@ pub async fn not_found(
#[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String =
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?)).await?;
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
Ok(HttpResponse::Ok()
.content_type(ContentType::plaintext())
.body(page_content))

View File

@ -13,12 +13,12 @@ use crate::{
};
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use regex::Regex;
use std::borrow::Cow;
use tokio::{
use std::{
borrow::Cow,
fs::File,
io::{AsyncBufReadExt, BufReader},
join,
io::{BufRead, BufReader, Read},
};
use tokio::join;
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
/// two search url parameters `q` and `page` where `page` parameter is optional.
@ -188,7 +188,7 @@ async fn results(
let mut results: SearchResults = SearchResults::default();
let flag: bool =
!is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?;
!is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
// Return early when query contains disallowed words,
if flag {
results.set_disallowed();
@ -252,14 +252,13 @@ async fn results(
///
/// Returns a bool indicating whether the results were found in the list or not on success
/// otherwise returns a standard error type on a failure.
async fn is_match_from_filter_list(
fn is_match_from_filter_list(
file_path: &str,
query: &str,
) -> Result<bool, Box<dyn std::error::Error>> {
let reader = BufReader::new(File::open(file_path).await?);
let mut lines = reader.lines();
while let Some(line) = lines.next_line().await? {
let re = Regex::new(&line)?;
let mut reader = BufReader::new(File::open(file_path)?);
for line in reader.by_ref().lines() {
let re = Regex::new(&line?)?;
if re.is_match(query) {
return Ok(true);
}