0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-24 15:08:22 -05:00

Compare commits

..

1 Commits

Author SHA1 Message Date
Łukasz Mariański
612e877f8e
Merge ed661174ba into bf7e73f9ff 2024-05-25 13:41:39 +00:00
6 changed files with 539 additions and 753 deletions

1149
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[package] [package]
name = "websurfx" name = "websurfx"
version = "1.17.20" version = "1.12.1"
edition = "2021" edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx" repository = "https://github.com/neon-mmd/websurfx"
@ -13,12 +13,11 @@ bench = false
path = "src/bin/websurfx.rs" path = "src/bin/websurfx.rs"
[dependencies] [dependencies]
reqwest = { version = "0.12.5", default-features = false, features = [ reqwest = { version = "0.11.24", default-features = false, features = [
"rustls-tls", "rustls-tls",
"brotli", "brotli",
"gzip", "gzip",
"socks", "socks"
"http2",
] } ] }
tokio = { version = "1.32.0", features = [ tokio = { version = "1.32.0", features = [
"rt-multi-thread", "rt-multi-thread",
@ -26,13 +25,13 @@ tokio = { version = "1.32.0", features = [
"fs", "fs",
"io-util", "io-util",
], default-features = false } ], default-features = false }
serde = { version = "1.0.209", default-features = false, features = ["derive"] } serde = { version = "1.0.196", default-features = false, features = ["derive"] }
serde_json = { version = "1.0.122", default-features = false } serde_json = { version = "1.0.116", default-features = false }
maud = { version = "0.26.0", default-features = false, features = [ maud = { version = "0.26.0", default-features = false, features = [
"actix-web", "actix-web",
] } ] }
scraper = { version = "0.20.0", default-features = false } scraper = { version = "0.18.1", default-features = false }
actix-web = { version = "4.9.0", features = [ actix-web = { version = "4.4.0", features = [
"cookies", "cookies",
"macros", "macros",
"compress-brotli", "compress-brotli",
@ -42,15 +41,15 @@ actix-cors = { version = "0.7.0", default-features = false }
fake-useragent = { version = "0.1.3", default-features = false } fake-useragent = { version = "0.1.3", default-features = false }
env_logger = { version = "0.11.1", default-features = false } env_logger = { version = "0.11.1", default-features = false }
log = { version = "0.4.21", default-features = false } log = { version = "0.4.21", default-features = false }
mlua = { version = "0.9.9", features = [ mlua = { version = "0.9.1", features = [
"luajit", "luajit",
"vendored", "vendored",
], default-features = false } ], default-features = false }
redis = { version = "0.25.4", features = [ redis = { version = "0.24.0", features = [
"tokio-comp", "tokio-comp",
"connection-manager", "connection-manager",
], default-features = false, optional = true } ], default-features = false, optional = true }
blake3 = { version = "1.5.4", default-features = false } blake3 = { version = "1.5.0", default-features = false }
error-stack = { version = "0.4.0", default-features = false, features = [ error-stack = { version = "0.4.0", default-features = false, features = [
"std", "std",
] } ] }
@ -62,13 +61,13 @@ smallvec = { version = "1.13.1", features = [
], default-features = false } ], default-features = false }
futures = { version = "0.3.30", default-features = false, features = ["alloc"] } futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
dhat = { version = "0.3.2", optional = true, default-features = false } dhat = { version = "0.3.2", optional = true, default-features = false }
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.38", default-features = false }
async-once-cell = { version = "0.5.3", default-features = false } async-once-cell = { version = "0.5.3", default-features = false }
actix-governor = { version = "0.5.0", default-features = false } actix-governor = { version = "0.5.0", default-features = false }
mini-moka = { version = "0.10", optional = true, default-features = false, features = [ mini-moka = { version = "0.10", optional = true, default-features = false, features = [
"sync", "sync",
] } ] }
async-compression = { version = "0.4.12", default-features = false, features = [ async-compression = { version = "0.4.6", default-features = false, features = [
"brotli", "brotli",
"tokio", "tokio",
], optional = true } ], optional = true }
@ -83,13 +82,14 @@ base64 = { version = "0.21.5", default-features = false, features = [
cfg-if = { version = "1.0.0", default-features = false, optional = true } cfg-if = { version = "1.0.0", default-features = false, optional = true }
keyword_extraction = { version = "1.4.3", default-features = false, features = [ keyword_extraction = { version = "1.4.3", default-features = false, features = [
"tf_idf", "tf_idf",
] } ] }
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] } stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [ thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
"moby", "moby",
] } ] }
itertools = {version = "0.13.0", default-features = false}
[dev-dependencies] [dev-dependencies]
rusty-hook = { version = "^0.11.2", default-features = false } rusty-hook = { version = "^0.11.2", default-features = false }
@ -97,7 +97,7 @@ criterion = { version = "0.5.1", default-features = false }
tempfile = { version = "3.10.1", default-features = false } tempfile = { version = "3.10.1", default-features = false }
[build-dependencies] [build-dependencies]
lightningcss = { version = "1.0.0-alpha.57", default-features = false, features = [ lightningcss = { version = "1.0.0-alpha.55", default-features = false, features = [
"grid", "grid",
] } ] }
# Disabled until bug fixing update # Disabled until bug fixing update
@ -133,50 +133,6 @@ codegen-units = 1
rpath = false rpath = false
strip = "symbols" strip = "symbols"
[profile.bsr1]
inherits = "release"
opt-level = "s"
[profile.bsr2]
inherits = "bsr1"
opt-level = "z"
[profile.lpcb1]
inherits = "release"
codegen-units = 16
[profile.lpcb2]
inherits = "lpcb1"
lto = "off"
[profile.lpcb3]
inherits = "lpcb2"
opt-level = 2
[profile.bsr_and_lpcb1]
inherits = "lpcb1"
opt-level = "s"
[profile.bsr_and_lpcb2]
inherits = "lpcb2"
opt-level = "s"
[profile.bsr_and_lpcb3]
inherits = "lpcb3"
opt-level = "s"
[profile.bsr_and_lpcb4]
inherits = "lpcb1"
opt-level = "z"
[profile.bsr_and_lpcb5]
inherits = "lpcb1"
opt-level = "z"
[profile.bsr_and_lpcb6]
inherits = "lpcb1"
opt-level = "z"
[features] [features]
use-synonyms-search = ["thesaurus/static"] use-synonyms-search = ["thesaurus/static"]
default = ["memory-cache"] default = ["memory-cache"]

View File

@ -1,4 +1,4 @@
FROM --platform=$BUILDPLATFORM rust:1.78.0-alpine3.18 AS chef FROM --platform=$BUILDPLATFORM rust:1.77.2-alpine3.18 AS chef
# We only pay the installation cost once, # We only pay the installation cost once,
# it will be cached from the second build onwards # it will be cached from the second build onwards
RUN apk add --no-cache alpine-sdk musl-dev g++ make libcrypto3 libressl-dev upx perl build-base RUN apk add --no-cache alpine-sdk musl-dev g++ make libcrypto3 libressl-dev upx perl build-base

View File

@ -1,5 +1,5 @@
# Create Builder image # Create Builder image
FROM --platform=$BUILDPLATFORM rust:1.78.0-alpine3.18 FROM --platform=$BUILDPLATFORM rust:1.77.2-alpine3.18
# Install required dependencies # Install required dependencies
RUN apk add --no-cache alpine-sdk musl-dev g++ make libcrypto3 libressl-dev perl build-base RUN apk add --no-cache alpine-sdk musl-dev g++ make libcrypto3 libressl-dev perl build-base

View File

@ -4,10 +4,7 @@
use super::error::CacheError; use super::error::CacheError;
use error_stack::Report; use error_stack::Report;
use futures::stream::FuturesUnordered; use futures::stream::FuturesUnordered;
use redis::{ use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
aio::ConnectionManager, AsyncCommands, Client, ExistenceCheck, RedisError, SetExpiry,
SetOptions,
};
/// A constant holding the redis pipeline size. /// A constant holding the redis pipeline size.
const REDIS_PIPELINE_SIZE: usize = 3; const REDIS_PIPELINE_SIZE: usize = 3;
@ -142,14 +139,8 @@ impl RedisCache {
self.current_connection = Default::default(); self.current_connection = Default::default();
for (key, json_result) in keys.zip(json_results) { for (key, json_result) in keys.zip(json_results) {
self.pipeline.set_options( self.pipeline
key, .set_ex(key, json_result, self.cache_ttl.into());
json_result,
SetOptions::default()
.conditional_set(ExistenceCheck::NX)
.get(true)
.with_expiration(SetExpiry::EX(self.cache_ttl.into())),
);
} }
let mut result: Result<(), RedisError> = self let mut result: Result<(), RedisError> = self

View File

@ -12,7 +12,6 @@ use crate::{
results::aggregator::aggregate, results::aggregator::aggregate,
}; };
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse}; use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use itertools::Itertools;
use regex::Regex; use regex::Regex;
use std::borrow::Cow; use std::borrow::Cow;
use tokio::{ use tokio::{
@ -41,6 +40,7 @@ pub async fn search(
config: web::Data<&'static Config>, config: web::Data<&'static Config>,
cache: web::Data<&'static SharedCache>, cache: web::Data<&'static SharedCache>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
use std::sync::Arc;
let params = web::Query::<SearchParams>::from_query(req.query_string())?; let params = web::Query::<SearchParams>::from_query(req.query_string())?;
match &params.q { match &params.q {
Some(query) => { Some(query) => {
@ -83,36 +83,44 @@ pub async fn search(
let previous_page = page.saturating_sub(1); let previous_page = page.saturating_sub(1);
let next_page = page + 1; let next_page = page + 1;
let results: (SearchResults, String, bool); let mut results = Arc::new((SearchResults::default(), String::default()));
if page != previous_page { if page != previous_page {
let (previous_results, current_results, next_results) = join!( let (previous_results, current_results, next_results) = join!(
get_results(previous_page), get_results(previous_page),
get_results(page), get_results(page),
get_results(next_page) get_results(next_page)
); );
let (parsed_previous_results, parsed_next_results) =
(previous_results?, next_results?);
results = current_results?; let (cache_keys, results_list) = (
[
parsed_previous_results.1,
results.1.clone(),
parsed_next_results.1,
],
[
parsed_previous_results.0,
results.0.clone(),
parsed_next_results.0,
],
);
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) = results = Arc::new(current_results?);
[previous_results?, results.clone(), next_results?]
.into_iter()
.filter_map(|(result, cache_key, flag)| {
dbg!(flag).then_some((result, cache_key))
})
.multiunzip();
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await }); tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
} else { } else {
let (current_results, next_results) = let (current_results, next_results) =
join!(get_results(page), get_results(page + 1)); join!(get_results(page), get_results(page + 1));
results = current_results?; let parsed_next_results = next_results?;
let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) = results = Arc::new(current_results?);
[results.clone(), next_results?]
.into_iter() let (cache_keys, results_list) = (
.filter_map(|(result, cache_key, flag)| flag.then_some((result, cache_key))) [results.1.clone(), parsed_next_results.1.clone()],
.multiunzip(); [results.0.clone(), parsed_next_results.0],
);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await }); tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
} }
@ -155,7 +163,7 @@ async fn results(
query: &str, query: &str,
page: u32, page: u32,
search_settings: &server_models::Cookie<'_>, search_settings: &server_models::Cookie<'_>,
) -> Result<(SearchResults, String, bool), Box<dyn std::error::Error>> { ) -> Result<(SearchResults, String), Box<dyn std::error::Error>> {
// eagerly parse cookie value to evaluate safe search level // eagerly parse cookie value to evaluate safe search level
let safe_search_level = search_settings.safe_search_level; let safe_search_level = search_settings.safe_search_level;
@ -174,7 +182,7 @@ async fn results(
// check if fetched cache results was indeed fetched or it was an error and if so // check if fetched cache results was indeed fetched or it was an error and if so
// handle the data accordingly. // handle the data accordingly.
match cached_results { match cached_results {
Ok(results) => Ok((results, cache_key, false)), Ok(results) => Ok((results, cache_key)),
Err(_) => { Err(_) => {
if safe_search_level == 4 { if safe_search_level == 4 {
let mut results: SearchResults = SearchResults::default(); let mut results: SearchResults = SearchResults::default();
@ -188,7 +196,7 @@ async fn results(
.cache_results(&[results.clone()], &[cache_key.clone()]) .cache_results(&[results.clone()], &[cache_key.clone()])
.await?; .await?;
results.set_safe_search_level(safe_search_level); results.set_safe_search_level(safe_search_level);
return Ok((results, cache_key, true)); return Ok((results, cache_key));
} }
} }
@ -227,7 +235,7 @@ async fn results(
.cache_results(&[results.clone()], &[cache_key.clone()]) .cache_results(&[results.clone()], &[cache_key.clone()])
.await?; .await?;
results.set_safe_search_level(safe_search_level); results.set_safe_search_level(safe_search_level);
Ok((results, cache_key, true)) Ok((results, cache_key))
} }
} }
} }