0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-24 23:18:22 -05:00

Compare commits

...

9 Commits

Author SHA1 Message Date
Kekma
9a812c1282
Merge c6b93403b8 into 948d20d8fb 2024-09-04 15:49:34 +00:00
mergify[bot]
c6b93403b8
Merge branch 'rolling' into 532 2024-09-04 15:49:31 +00:00
alamin655
948d20d8fb
Merge pull request #599 from neon-mmd/FIX/592_redis-does-not-invalidate-cached-results
🐛 Redis does not invalidate cached search results
2024-09-04 21:18:54 +05:30
alamin655
4315221385
Merge branch 'rolling' into FIX/592_redis-does-not-invalidate-cached-results 2024-09-04 21:14:22 +05:30
dependabot[bot]
3a1ff0f307
build(deps): bump quinn-proto from 0.11.6 to 0.11.8 (#600) 2024-09-04 01:17:18 +00:00
neon_arch
b22d60f166 🔖 chore(release): bump the app version (#592) 2024-09-02 21:40:49 +05:30
neon_arch
acee5d892d 🐛 fix: replace deprecated set_ex command with set_options in cache_json function (#592) 2024-09-02 21:17:31 +05:30
neon_arch
9a5f1c5f44 🐛 fix: reimplement caching code within the search function (#592)
- reduce resource usage &
- only cache search results which has not been cached before.
2024-09-02 21:10:54 +05:30
neon_arch
ebee1f4a6c build(deps): add itertools crate (#592) 2024-09-02 20:57:50 +05:30
4 changed files with 53 additions and 64 deletions

47
Cargo.lock generated
View File

@ -351,7 +351,7 @@ version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa" checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa"
dependencies = [ dependencies = [
"brotli 5.0.0", "brotli",
"flate2", "flate2",
"futures-core", "futures-core",
"memchr", "memchr",
@ -490,28 +490,7 @@ checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b"
dependencies = [ dependencies = [
"alloc-no-stdlib", "alloc-no-stdlib",
"alloc-stdlib", "alloc-stdlib",
"brotli-decompressor 2.5.1", "brotli-decompressor",
]
[[package]]
name = "brotli"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19483b140a7ac7174d34b5a581b406c64f84da5409d3e09cf4fff604f9270e67"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor 4.0.0",
]
[[package]]
name = "brotli-decompressor"
version = "4.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
] ]
[[package]] [[package]]
@ -872,7 +851,7 @@ dependencies = [
"clap", "clap",
"criterion-plot", "criterion-plot",
"is-terminal", "is-terminal",
"itertools", "itertools 0.10.5",
"num-traits", "num-traits",
"once_cell", "once_cell",
"oorandom", "oorandom",
@ -891,7 +870,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [ dependencies = [
"cast", "cast",
"itertools", "itertools 0.10.5",
] ]
[[package]] [[package]]
@ -1902,6 +1881,15 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "0.4.8" version = "0.4.8"
@ -2004,7 +1992,7 @@ dependencies = [
"cssparser-color", "cssparser-color",
"data-encoding", "data-encoding",
"getrandom", "getrandom",
"itertools", "itertools 0.10.5",
"lazy_static", "lazy_static",
"parcel_selectors", "parcel_selectors",
"paste", "paste",
@ -2849,9 +2837,9 @@ dependencies = [
[[package]] [[package]]
name = "quinn-proto" name = "quinn-proto"
version = "0.11.6" version = "0.11.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba92fb39ec7ad06ca2582c0ca834dfeadcaf06ddfc8e635c80aa7e1c05315fdd" checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6"
dependencies = [ dependencies = [
"bytes 1.6.0", "bytes 1.6.0",
"rand 0.8.5", "rand 0.8.5",
@ -4461,7 +4449,7 @@ dependencies = [
[[package]] [[package]]
name = "websurfx" name = "websurfx"
version = "1.17.0" version = "1.17.20"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-files", "actix-files",
@ -4481,6 +4469,7 @@ dependencies = [
"error-stack", "error-stack",
"fake-useragent", "fake-useragent",
"futures 0.3.30", "futures 0.3.30",
"itertools 0.13.0",
"keyword_extraction", "keyword_extraction",
"lightningcss", "lightningcss",
"log", "log",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "websurfx" name = "websurfx"
version = "1.17.0" version = "1.17.20"
edition = "2021" edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx" repository = "https://github.com/neon-mmd/websurfx"
@ -82,14 +82,13 @@ base64 = { version = "0.21.5", default-features = false, features = [
cfg-if = { version = "1.0.0", default-features = false, optional = true } cfg-if = { version = "1.0.0", default-features = false, optional = true }
keyword_extraction = { version = "1.4.3", default-features = false, features = [ keyword_extraction = { version = "1.4.3", default-features = false, features = [
"tf_idf", "tf_idf",
] } ] }
stop-words = { version = "0.8.0", default-features = false, features = ["iso"] } stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [ thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
"moby", "moby",
] } ]}
itertools = {version = "0.13.0", default-features = false}
[dev-dependencies] [dev-dependencies]
rusty-hook = { version = "^0.11.2", default-features = false } rusty-hook = { version = "^0.11.2", default-features = false }

View File

@ -4,7 +4,10 @@
use super::error::CacheError; use super::error::CacheError;
use error_stack::Report; use error_stack::Report;
use futures::stream::FuturesUnordered; use futures::stream::FuturesUnordered;
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError}; use redis::{
aio::ConnectionManager, AsyncCommands, Client, ExistenceCheck, RedisError, SetExpiry,
SetOptions,
};
/// A constant holding the redis pipeline size. /// A constant holding the redis pipeline size.
const REDIS_PIPELINE_SIZE: usize = 3; const REDIS_PIPELINE_SIZE: usize = 3;
@ -139,8 +142,14 @@ impl RedisCache {
self.current_connection = Default::default(); self.current_connection = Default::default();
for (key, json_result) in keys.zip(json_results) { for (key, json_result) in keys.zip(json_results) {
self.pipeline self.pipeline.set_options(
.set_ex(key, json_result, self.cache_ttl.into()); key,
json_result,
SetOptions::default()
.conditional_set(ExistenceCheck::NX)
.get(true)
.with_expiration(SetExpiry::EX(self.cache_ttl.into())),
);
} }
let mut result: Result<(), RedisError> = self let mut result: Result<(), RedisError> = self

View File

@ -12,6 +12,7 @@ use crate::{
results::aggregator::aggregate, results::aggregator::aggregate,
}; };
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse}; use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use itertools::Itertools;
use regex::Regex; use regex::Regex;
use std::borrow::Cow; use std::borrow::Cow;
use tokio::{ use tokio::{
@ -40,7 +41,6 @@ pub async fn search(
config: web::Data<&'static Config>, config: web::Data<&'static Config>,
cache: web::Data<&'static SharedCache>, cache: web::Data<&'static SharedCache>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> { ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
use std::sync::Arc;
let params = web::Query::<SearchParams>::from_query(req.query_string())?; let params = web::Query::<SearchParams>::from_query(req.query_string())?;
match &params.q { match &params.q {
Some(query) => { Some(query) => {
@ -83,44 +83,36 @@ pub async fn search(
let previous_page = page.saturating_sub(1); let previous_page = page.saturating_sub(1);
let next_page = page + 1; let next_page = page + 1;
let mut results = Arc::new((SearchResults::default(), String::default())); let results: (SearchResults, String, bool);
if page != previous_page { if page != previous_page {
let (previous_results, current_results, next_results) = join!( let (previous_results, current_results, next_results) = join!(
get_results(previous_page), get_results(previous_page),
get_results(page), get_results(page),
get_results(next_page) get_results(next_page)
); );
let (parsed_previous_results, parsed_next_results) =
(previous_results?, next_results?);
let (cache_keys, results_list) = ( results = current_results?;
[
parsed_previous_results.1,
results.1.clone(),
parsed_next_results.1,
],
[
parsed_previous_results.0,
results.0.clone(),
parsed_next_results.0,
],
);
results = Arc::new(current_results?); let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
[previous_results?, results.clone(), next_results?]
.into_iter()
.filter_map(|(result, cache_key, flag)| {
dbg!(flag).then_some((result, cache_key))
})
.multiunzip();
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await }); tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
} else { } else {
let (current_results, next_results) = let (current_results, next_results) =
join!(get_results(page), get_results(page + 1)); join!(get_results(page), get_results(page + 1));
let parsed_next_results = next_results?; results = current_results?;
results = Arc::new(current_results?); let (results_list, cache_keys): (Vec<SearchResults>, Vec<String>) =
[results.clone(), next_results?]
let (cache_keys, results_list) = ( .into_iter()
[results.1.clone(), parsed_next_results.1.clone()], .filter_map(|(result, cache_key, flag)| flag.then_some((result, cache_key)))
[results.0.clone(), parsed_next_results.0], .multiunzip();
);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await }); tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
} }
@ -163,7 +155,7 @@ async fn results(
query: &str, query: &str,
page: u32, page: u32,
search_settings: &server_models::Cookie<'_>, search_settings: &server_models::Cookie<'_>,
) -> Result<(SearchResults, String), Box<dyn std::error::Error>> { ) -> Result<(SearchResults, String, bool), Box<dyn std::error::Error>> {
// eagerly parse cookie value to evaluate safe search level // eagerly parse cookie value to evaluate safe search level
let safe_search_level = search_settings.safe_search_level; let safe_search_level = search_settings.safe_search_level;
@ -182,7 +174,7 @@ async fn results(
// check if fetched cache results was indeed fetched or it was an error and if so // check if fetched cache results was indeed fetched or it was an error and if so
// handle the data accordingly. // handle the data accordingly.
match cached_results { match cached_results {
Ok(results) => Ok((results, cache_key)), Ok(results) => Ok((results, cache_key, false)),
Err(_) => { Err(_) => {
if safe_search_level == 4 { if safe_search_level == 4 {
let mut results: SearchResults = SearchResults::default(); let mut results: SearchResults = SearchResults::default();
@ -196,7 +188,7 @@ async fn results(
.cache_results(&[results.clone()], &[cache_key.clone()]) .cache_results(&[results.clone()], &[cache_key.clone()])
.await?; .await?;
results.set_safe_search_level(safe_search_level); results.set_safe_search_level(safe_search_level);
return Ok((results, cache_key)); return Ok((results, cache_key, true));
} }
} }
@ -235,7 +227,7 @@ async fn results(
.cache_results(&[results.clone()], &[cache_key.clone()]) .cache_results(&[results.clone()], &[cache_key.clone()])
.await?; .await?;
results.set_safe_search_level(safe_search_level); results.set_safe_search_level(safe_search_level);
Ok((results, cache_key)) Ok((results, cache_key, true))
} }
} }
} }