mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 22:18:23 -05:00
Compare commits
No commits in common. "e385a577e021fe2ba1f08e549658b93bdce56c2c" and "6aa99922a6abc6657288439c8d6c9a384ec8aa65" have entirely different histories.
e385a577e0
...
6aa99922a6
617
Cargo.lock
generated
617
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "websurfx"
|
||||
version = "1.10.9"
|
||||
version = "1.9.20"
|
||||
edition = "2021"
|
||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||
repository = "https://github.com/neon-mmd/websurfx"
|
||||
@ -14,7 +14,7 @@ path = "src/bin/websurfx.rs"
|
||||
|
||||
[dependencies]
|
||||
reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
|
||||
tokio = {version="1.32.0",features=["rt-multi-thread","macros", "fs", "io-util"], default-features = false}
|
||||
tokio = {version="1.32.0",features=["rt-multi-thread","macros"], default-features = false}
|
||||
serde = {version="1.0.196", default-features=false, features=["derive"]}
|
||||
serde_json = {version="1.0.109", default-features=false}
|
||||
maud = {version="0.25.0", default-features=false, features=["actix-web"]}
|
||||
@ -38,7 +38,7 @@ mimalloc = { version = "0.1.38", default-features = false }
|
||||
async-once-cell = {version="0.5.3", default-features=false}
|
||||
actix-governor = {version="0.5.0", default-features=false}
|
||||
mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
|
||||
async-compression = { version = "0.4.6", default-features = false, features=["brotli","tokio"], optional=true}
|
||||
brotli = { version = "3.4.0", default-features = false, features=["std"], optional=true}
|
||||
chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
|
||||
chacha20 = {version="0.9.1", default-features=false, optional=true}
|
||||
base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
|
||||
@ -84,7 +84,7 @@ default = ["memory-cache"]
|
||||
dhat-heap = ["dep:dhat"]
|
||||
memory-cache = ["dep:mini-moka"]
|
||||
redis-cache = ["dep:redis","dep:base64"]
|
||||
compress-cache-results = ["dep:async-compression","dep:cfg-if"]
|
||||
compress-cache-results = ["dep:brotli","dep:cfg-if"]
|
||||
encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
|
||||
cec-cache-results = ["compress-cache-results","encrypt-cache-results"]
|
||||
|
||||
|
68
src/cache/cacher.rs
vendored
68
src/cache/cacher.rs
vendored
@ -93,7 +93,7 @@ pub trait Cacher: Send + Sync {
|
||||
feature = "encrypt-cache-results",
|
||||
feature = "cec-cache-results"
|
||||
))]
|
||||
async fn encrypt_or_decrypt_results(
|
||||
fn encrypt_or_decrypt_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
encrypt: bool,
|
||||
@ -137,19 +137,11 @@ pub trait Cacher: Send + Sync {
|
||||
/// Returns the compressed bytes on success otherwise it returns a CacheError
|
||||
/// on failure.
|
||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||
async fn compress_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
|
||||
fn compress_results(&mut self, mut bytes: Vec<u8>) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use std::io::Write;
|
||||
let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22);
|
||||
writer
|
||||
.write_all(&bytes)
|
||||
.await
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
writer
|
||||
.shutdown()
|
||||
.await
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
bytes = writer.into_inner();
|
||||
Ok(bytes)
|
||||
@ -167,17 +159,17 @@ pub trait Cacher: Send + Sync {
|
||||
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
|
||||
/// on failure.
|
||||
#[cfg(feature = "cec-cache-results")]
|
||||
async fn compress_encrypt_compress_results(
|
||||
fn compress_encrypt_compress_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
// compress first
|
||||
bytes = self.compress_results(bytes).await?;
|
||||
bytes = self.compress_results(bytes)?;
|
||||
// encrypt
|
||||
bytes = self.encrypt_or_decrypt_results(bytes, true).await?;
|
||||
bytes = self.encrypt_or_decrypt_results(bytes, true)?;
|
||||
|
||||
// compress again;
|
||||
bytes = self.compress_results(bytes).await?;
|
||||
bytes = self.compress_results(bytes)?;
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
@ -195,11 +187,11 @@ pub trait Cacher: Send + Sync {
|
||||
/// on failure.
|
||||
|
||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||
async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(feature = "compress-cache-results")]
|
||||
{
|
||||
decompress_util(bytes).await
|
||||
decompress_util(bytes)
|
||||
|
||||
}
|
||||
else if #[cfg(feature = "cec-cache-results")]
|
||||
@ -207,7 +199,7 @@ pub trait Cacher: Send + Sync {
|
||||
let decompressed = decompress_util(bytes)?;
|
||||
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
|
||||
|
||||
decompress_util(&decrypted).await
|
||||
decompress_util(&decrypted)
|
||||
|
||||
}
|
||||
}
|
||||
@ -224,7 +216,7 @@ pub trait Cacher: Send + Sync {
|
||||
/// # Error
|
||||
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
|
||||
/// on failure.
|
||||
async fn pre_process_search_results(
|
||||
fn pre_process_search_results(
|
||||
&mut self,
|
||||
search_results: &SearchResults,
|
||||
) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
@ -232,20 +224,19 @@ pub trait Cacher: Send + Sync {
|
||||
let mut bytes: Vec<u8> = search_results.try_into()?;
|
||||
#[cfg(feature = "compress-cache-results")]
|
||||
{
|
||||
let compressed = self.compress_results(bytes).await?;
|
||||
let compressed = self.compress_results(bytes)?;
|
||||
bytes = compressed;
|
||||
}
|
||||
|
||||
#[cfg(feature = "encrypt-cache-results")]
|
||||
{
|
||||
let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?;
|
||||
let encrypted = self.encrypt_or_decrypt_results(bytes, true)?;
|
||||
bytes = encrypted;
|
||||
}
|
||||
|
||||
#[cfg(feature = "cec-cache-results")]
|
||||
{
|
||||
let compressed_encrypted_compressed =
|
||||
self.compress_encrypt_compress_results(bytes).await?;
|
||||
let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?;
|
||||
bytes = compressed_encrypted_compressed;
|
||||
}
|
||||
|
||||
@ -265,25 +256,25 @@ pub trait Cacher: Send + Sync {
|
||||
/// on failure.
|
||||
|
||||
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
|
||||
async fn post_process_search_results(
|
||||
fn post_process_search_results(
|
||||
&mut self,
|
||||
mut bytes: Vec<u8>,
|
||||
) -> Result<SearchResults, Report<CacheError>> {
|
||||
#[cfg(feature = "compress-cache-results")]
|
||||
{
|
||||
let decompressed = self.decompress_results(&bytes).await?;
|
||||
let decompressed = self.decompress_results(&bytes)?;
|
||||
bytes = decompressed
|
||||
}
|
||||
|
||||
#[cfg(feature = "encrypt-cache-results")]
|
||||
{
|
||||
let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?;
|
||||
let decrypted = self.encrypt_or_decrypt_results(bytes, false)?;
|
||||
bytes = decrypted
|
||||
}
|
||||
|
||||
#[cfg(feature = "cec-cache-results")]
|
||||
{
|
||||
let decompressed_decrypted = self.decompress_results(&bytes).await?;
|
||||
let decompressed_decrypted = self.decompress_results(&bytes)?;
|
||||
bytes = decompressed_decrypted;
|
||||
}
|
||||
|
||||
@ -304,19 +295,16 @@ pub trait Cacher: Send + Sync {
|
||||
/// on failure.
|
||||
|
||||
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
|
||||
async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new());
|
||||
fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
|
||||
use std::io::Write;
|
||||
let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096);
|
||||
|
||||
writer
|
||||
.write_all(input)
|
||||
.await
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
writer
|
||||
.shutdown()
|
||||
.await
|
||||
let bytes = writer
|
||||
.into_inner()
|
||||
.map_err(|_| CacheError::CompressionError)?;
|
||||
let bytes = writer.into_inner();
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
@ -341,7 +329,7 @@ impl Cacher for RedisCache {
|
||||
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
|
||||
.decode(base64_string)
|
||||
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
|
||||
self.post_process_search_results(bytes).await
|
||||
self.post_process_search_results(bytes)
|
||||
}
|
||||
|
||||
async fn cache_results(
|
||||
@ -357,7 +345,7 @@ impl Cacher for RedisCache {
|
||||
let mut bytes = Vec::with_capacity(search_results_len);
|
||||
|
||||
for result in search_results {
|
||||
let processed = self.pre_process_search_results(result).await?;
|
||||
let processed = self.pre_process_search_results(result)?;
|
||||
bytes.push(processed);
|
||||
}
|
||||
|
||||
@ -417,7 +405,7 @@ impl Cacher for InMemoryCache {
|
||||
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
||||
let hashed_url_string = self.hash_url(url);
|
||||
match self.cache.get(&hashed_url_string) {
|
||||
Some(res) => self.post_process_search_results(res).await,
|
||||
Some(res) => self.post_process_search_results(res),
|
||||
None => Err(Report::new(CacheError::MissingValue)),
|
||||
}
|
||||
}
|
||||
@ -429,7 +417,7 @@ impl Cacher for InMemoryCache {
|
||||
) -> Result<(), Report<CacheError>> {
|
||||
for (url, search_result) in urls.iter().zip(search_results.iter()) {
|
||||
let hashed_url_string = self.hash_url(url);
|
||||
let bytes = self.pre_process_search_results(search_result).await?;
|
||||
let bytes = self.pre_process_search_results(search_result)?;
|
||||
self.cache.insert(hashed_url_string, bytes);
|
||||
}
|
||||
|
||||
|
@ -14,12 +14,12 @@ use regex::Regex;
|
||||
use reqwest::{Client, ClientBuilder};
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use tokio::{
|
||||
fs::File,
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
task::JoinHandle,
|
||||
use std::{fs::File, io::BufRead};
|
||||
use std::{
|
||||
io::{BufReader, Read},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
/// A constant for holding the prebuilt Client globally in the app.
|
||||
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
|
||||
@ -169,15 +169,13 @@ pub async fn aggregate(
|
||||
&mut result_map,
|
||||
&mut blacklist_map,
|
||||
file_path(FileType::BlockList)?,
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
|
||||
filter_with_lists(
|
||||
&mut blacklist_map,
|
||||
&mut result_map,
|
||||
file_path(FileType::AllowList)?,
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
|
||||
drop(blacklist_map);
|
||||
}
|
||||
@ -198,16 +196,15 @@ pub async fn aggregate(
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
||||
pub async fn filter_with_lists(
|
||||
pub fn filter_with_lists(
|
||||
map_to_be_filtered: &mut Vec<(String, SearchResult)>,
|
||||
resultant_map: &mut Vec<(String, SearchResult)>,
|
||||
file_path: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let reader = BufReader::new(File::open(file_path).await?);
|
||||
let mut lines = reader.lines();
|
||||
let mut reader = BufReader::new(File::open(file_path)?);
|
||||
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
let re = Regex::new(line.trim())?;
|
||||
for line in reader.by_ref().lines() {
|
||||
let re = Regex::new(line?.trim())?;
|
||||
|
||||
let mut length = map_to_be_filtered.len();
|
||||
let mut idx: usize = Default::default();
|
||||
@ -239,8 +236,8 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[test]
|
||||
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create a map of search results to filter
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
map_to_be_filtered.push((
|
||||
@ -274,8 +271,7 @@ mod tests {
|
||||
&mut map_to_be_filtered,
|
||||
&mut resultant_map,
|
||||
file.path().to_str().unwrap(),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
|
||||
assert_eq!(resultant_map.len(), 2);
|
||||
assert!(resultant_map
|
||||
@ -289,8 +285,8 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[test]
|
||||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
map_to_be_filtered.push((
|
||||
"https://www.example.com".to_owned(),
|
||||
@ -323,8 +319,7 @@ mod tests {
|
||||
&mut map_to_be_filtered,
|
||||
&mut resultant_map,
|
||||
file.path().to_str().unwrap(),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
|
||||
assert_eq!(resultant_map.len(), 1);
|
||||
assert!(resultant_map
|
||||
@ -338,8 +333,8 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists_file_not_found() {
|
||||
#[test]
|
||||
fn test_filter_with_lists_file_not_found() {
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
|
||||
let mut resultant_map = Vec::new();
|
||||
@ -351,11 +346,11 @@ mod tests {
|
||||
"non-existent-file.txt",
|
||||
);
|
||||
|
||||
assert!(result.await.is_err());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_filter_with_lists_invalid_regex() {
|
||||
#[test]
|
||||
fn test_filter_with_lists_invalid_regex() {
|
||||
let mut map_to_be_filtered = Vec::new();
|
||||
map_to_be_filtered.push((
|
||||
"https://www.example.com".to_owned(),
|
||||
@ -381,6 +376,6 @@ mod tests {
|
||||
file.path().to_str().unwrap(),
|
||||
);
|
||||
|
||||
assert!(result.await.is_err());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ use crate::{
|
||||
handler::{file_path, FileType},
|
||||
};
|
||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||
use tokio::fs::read_to_string;
|
||||
use std::fs::read_to_string;
|
||||
|
||||
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
||||
#[get("/")]
|
||||
@ -43,7 +43,7 @@ pub async fn not_found(
|
||||
#[get("/robots.txt")]
|
||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
let page_content: String =
|
||||
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?)).await?;
|
||||
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
|
||||
Ok(HttpResponse::Ok()
|
||||
.content_type(ContentType::plaintext())
|
||||
.body(page_content))
|
||||
|
@ -13,12 +13,12 @@ use crate::{
|
||||
};
|
||||
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
|
||||
use regex::Regex;
|
||||
use std::borrow::Cow;
|
||||
use tokio::{
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
fs::File,
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
join,
|
||||
io::{BufRead, BufReader, Read},
|
||||
};
|
||||
use tokio::join;
|
||||
|
||||
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
||||
/// two search url parameters `q` and `page` where `page` parameter is optional.
|
||||
@ -188,7 +188,7 @@ async fn results(
|
||||
let mut results: SearchResults = SearchResults::default();
|
||||
|
||||
let flag: bool =
|
||||
!is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?;
|
||||
!is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
||||
// Return early when query contains disallowed words,
|
||||
if flag {
|
||||
results.set_disallowed();
|
||||
@ -252,14 +252,13 @@ async fn results(
|
||||
///
|
||||
/// Returns a bool indicating whether the results were found in the list or not on success
|
||||
/// otherwise returns a standard error type on a failure.
|
||||
async fn is_match_from_filter_list(
|
||||
fn is_match_from_filter_list(
|
||||
file_path: &str,
|
||||
query: &str,
|
||||
) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
let reader = BufReader::new(File::open(file_path).await?);
|
||||
let mut lines = reader.lines();
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
let re = Regex::new(&line)?;
|
||||
let mut reader = BufReader::new(File::open(file_path)?);
|
||||
for line in reader.by_ref().lines() {
|
||||
let re = Regex::new(&line?)?;
|
||||
if re.is_match(query) {
|
||||
return Ok(true);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user