diff --git a/Cargo.lock b/Cargo.lock index 71e1463..989aa5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -532,18 +532,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.1" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c8d502cbaec4595d2e7d5f61e318f05417bd2b66fdc3809498f0d3fdf0bea27" +checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.4.1" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5891c7bc0edb3e1c2204fc5e94009affabeb1821c9e5fdc3959536c5c0bb984d" +checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" dependencies = [ "anstyle", "clap_lex", @@ -1270,9 +1270,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "handlebars" -version = "4.3.7" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d" +checksum = "c39b3bc2a8f715298032cf5087e58573809374b08160aa7d750582bdb82d2683" dependencies = [ "log", "pest", @@ -2494,9 +2494,9 @@ dependencies = [ [[package]] name = "redis" -version = "0.23.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffd6543a7bc6428396845f6854ccf3d1ae8823816592e2cbe74f20f50f209d02" +checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba" dependencies = [ "arc-swap", "async-trait", @@ -2663,9 +2663,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.10" +version = "0.38.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6248e1caa625eb708e266e06159f135e8c26f2bb7ceb72dc4b2766d0340964" +checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453" dependencies = [ "bitflags 2.4.0", "errno", @@ -3697,7 +3697,7 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.18.6" +version = "0.19.0" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index 43b2c3e..cf505fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.18.6" +version = "0.19.0" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" @@ -10,7 +10,7 @@ license = "AGPL-3.0" reqwest = {version="0.11.20",features=["json"]} tokio = {version="1.32.0",features=["rt-multi-thread","macros"]} serde = {version="1.0.188",features=["derive"]} -handlebars = { version = "4.3.7", features = ["dir_source"] } +handlebars = { version = "4.4.0", features = ["dir_source"] } scraper = {version="0.17.1"} actix-web = {version="4.4.0", features = ["cookies"]} actix-files = {version="0.6.2"} @@ -19,8 +19,8 @@ serde_json = {version="1.0.105"} fake-useragent = {version="0.1.3"} env_logger = {version="0.10.0"} log = {version="0.4.20"} -mlua = {version="0.8.10",features=["luajit"]} -redis = {version="0.23.2",features=["tokio-comp","connection-manager"]} +mlua = {version="0.8.10", features=["luajit"]} +redis = {version="0.23.3", features=["tokio-comp","connection-manager"]} md5 = {version="0.7.0"} rand={version="0.8.5"} once_cell = {version="1.18.0"} diff --git a/public/images/barricade.png b/public/images/barricade.png new file mode 100644 index 0000000..eef097b Binary files /dev/null and b/public/images/barricade.png differ diff --git a/public/images/filter.png b/public/images/filter.png new file mode 100644 index 0000000..5325c27 Binary files /dev/null and b/public/images/filter.png differ diff --git a/public/static/themes/simple.css b/public/static/themes/simple.css index 37e3c21..3d9c3b9 100644 --- a/public/static/themes/simple.css +++ b/public/static/themes/simple.css @@ -132,6 +132,35 @@ body { width: 1.2rem; height: 1.2rem; } +.results .result_disallowed, +.results .result_filtered { + display: flex; + justify-content: center; + align-items: center; + gap: 10rem; + font-size: 2rem; + color: var(--foreground-color); + margin: 0rem 7rem; +} + +.results .result_disallowed .user_query, +.results .result_filtered .user_query { + color: var(--background-color); + font-weight: 300; +} + +.results .result_disallowed img, +.results .result_filtered img { + width: 30rem; +} + +.results .result_disallowed div, +.results .result_filtered div { + display: flex; + flex-direction: column; + gap: 1rem; + line-break: strict; +} /* styles for the footer and header */ diff --git a/public/templates/search.html b/public/templates/search.html index e6fd4e8..8a79d69 100644 --- a/public/templates/search.html +++ b/public/templates/search.html @@ -1,37 +1,69 @@ {{>header this.style}}
- {{>search_bar this}} -
- {{#if results}} {{#each results}} -
-

{{{this.title}}}

- {{{this.url}}} -

{{{this.description}}}

-
- {{#each engine}} - {{{this}}} - {{/each}} -
+ {{>search_bar this}} +
+ {{#if results}} {{#each results}} +
+

{{{this.title}}}

+ {{{this.url}}} +

{{{this.description}}}

+
+ {{#each engine}} + {{{this}}} + {{/each}} +
+
+ {{/each}} {{else}} {{#if disallowed}} +
+
+

+ Your search - {{{this.pageQuery}}} - + has been disallowed. +

+

Dear user,

+

+ The query - {{{this.pageQuery}}} - has + been blacklisted via server configuration and hence disallowed by the + server. Henceforth no results could be displayed for your query. +

+
+ Image of a Barricade +
+ {{else}} {{#if filtered}} +
+
+

+ Your search - {{{this.pageQuery}}} - + has been filtered. +

+

Dear user,

+

+ All the search results contain results that has been configured to be + filtered out via server configuration and henceforth has been + completely filtered out. +

+
+ Image of a paper inside a funnel +
+ {{else}} +
+

Your search - {{{this.pageQuery}}} - did not match any documents.

+

Suggestions:

+
    +
  • Make sure that all words are spelled correctly.
  • +
  • Try different keywords.
  • +
  • Try more general keywords.
  • +
+ Man fishing gif +
+ {{/if}} {{/if}} {{/if}}
- {{/each}} {{else}} -
-

Your search - {{{this.pageQuery}}} - did not match any documents.

-

Suggestions:

-
    -
  • Make sure that all words are spelled correctly.
  • -
  • Try different keywords.
  • -
  • Try more general keywords.
  • -
- Man fishing gif + - {{/if}} -
-
diff --git a/src/config/parser.rs b/src/config/parser.rs index fc0a861..f5e6d48 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -35,6 +35,7 @@ pub struct Config { pub upstream_search_engines: Vec, pub request_timeout: u8, pub threads: u8, + pub safe_search: u8, } /// Configuration options for the aggregator. @@ -89,6 +90,16 @@ impl Config { parsed_threads }; + let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?; + let safe_search: u8 = match parsed_safe_search { + 0..=4 => parsed_safe_search, + _ => { + log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4."); + log::error!("Falling back to using the value `1` for the option"); + 1 + } + }; + Ok(Config { port: globals.get::<_, u16>("port")?, binding_ip: globals.get::<_, String>("binding_ip")?, @@ -110,6 +121,7 @@ impl Config { .collect(), request_timeout: globals.get::<_, u8>("request_timeout")?, threads, + safe_search, }) } } diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index c716e8a..7b9f7d6 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -43,6 +43,7 @@ impl SearchEngine for DuckDuckGo { page: u32, user_agent: &str, request_timeout: u8, + _safe_search: u8, ) -> Result, EngineError> { // Page number can be missing or empty string and so appropriate handling is required // so that upstream server recieves valid page number. diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs index 86fb207..f4e7e5a 100644 --- a/src/engines/engine_models.rs +++ b/src/engines/engine_models.rs @@ -71,6 +71,7 @@ pub trait SearchEngine: Sync + Send { page: u32, user_agent: &str, request_timeout: u8, + safe_search: u8, ) -> Result, EngineError>; } diff --git a/src/engines/searx.rs b/src/engines/searx.rs index ca45cf0..4eb22c5 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -42,12 +42,21 @@ impl SearchEngine for Searx { page: u32, user_agent: &str, request_timeout: u8, + mut safe_search: u8, ) -> Result, EngineError> { // Page number can be missing or empty string and so appropriate handling is required // so that upstream server recieves valid page number. + if safe_search == 3 { + safe_search = 2; + }; + let url: String = match page { - 0 | 1 => format!("https://searx.work/search?q={query}&pageno=1"), - _ => format!("https://searx.work/search?q={query}&pageno={page}"), + 0 | 1 => { + format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}") + } + _ => format!( + "https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}" + ), }; // initializing headers and adding appropriate headers. diff --git a/src/results/aggregation_models.rs b/src/results/aggregation_models.rs index 11b2e63..280767c 100644 --- a/src/results/aggregation_models.rs +++ b/src/results/aggregation_models.rs @@ -102,13 +102,15 @@ impl EngineErrorInfo { /// and the type of error that caused it. /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the /// given search query. -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Default)] #[serde(rename_all = "camelCase")] pub struct SearchResults { pub results: Vec, pub page_query: String, pub style: Style, - pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>, + pub engine_errors_info: Vec, + pub disallowed: bool, + pub filtered: bool, } impl SearchResults { @@ -122,6 +124,7 @@ impl SearchResults { /// the search url. /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the /// given search query. + /// * `` pub fn new( results: Vec, page_query: &str, @@ -131,12 +134,38 @@ impl SearchResults { results, page_query: page_query.to_owned(), style: Style::default(), - engine_errors_info: SmallVec::from(engine_errors_info), + engine_errors_info: engine_errors_info.to_owned(), + disallowed: Default::default(), + filtered: Default::default(), } } /// A setter function to add website style to the return search results. pub fn add_style(&mut self, style: &Style) { - self.style = style.to_owned(); + self.style = style.clone(); + } + + /// A setter function that sets disallowed to true. + pub fn set_disallowed(&mut self) { + self.disallowed = true; + } + + /// A setter function to set the current page search query. + pub fn set_page_query(&mut self, page: &str) { + self.page_query = page.to_owned(); + } + + /// A setter function that sets the filtered to true. + pub fn set_filtered(&mut self) { + self.filtered = true; + } + + /// A getter function that gets the value of `engine_errors_info`. + pub fn engine_errors_info(&mut self) -> Vec { + std::mem::take(&mut self.engine_errors_info) + } + /// A getter function that gets the value of `results`. + pub fn results(&mut self) -> Vec { + self.results.clone() } } diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 1942acc..734a65f 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -70,6 +70,7 @@ pub async fn aggregate( debug: bool, upstream_search_engines: &[EngineHandler], request_timeout: u8, + safe_search: u8, ) -> Result> { let user_agent: &str = random_user_agent(); @@ -91,7 +92,13 @@ pub async fn aggregate( let query: String = query.to_owned(); tasks.push(tokio::spawn(async move { search_engine - .results(&query, page, user_agent, request_timeout) + .results( + &query, + page, + user_agent.clone(), + request_timeout, + safe_search, + ) .await })); } @@ -150,20 +157,22 @@ pub async fn aggregate( } } - let mut blacklist_map: HashMap = HashMap::new(); - filter_with_lists( - &mut result_map, - &mut blacklist_map, - file_path(FileType::BlockList)?, - )?; + if safe_search >= 3 { + let mut blacklist_map: HashMap = HashMap::new(); + filter_with_lists( + &mut result_map, + &mut blacklist_map, + file_path(FileType::BlockList)?, + )?; - filter_with_lists( - &mut blacklist_map, - &mut result_map, - file_path(FileType::AllowList)?, - )?; + filter_with_lists( + &mut blacklist_map, + &mut result_map, + file_path(FileType::AllowList)?, + )?; - drop(blacklist_map); + drop(blacklist_map); + } let results: Vec = result_map.into_values().collect(); @@ -189,7 +198,7 @@ pub fn filter_with_lists( let mut reader = BufReader::new(File::open(file_path)?); for line in reader.by_ref().lines() { - let re = Regex::new(&line?)?; + let re = Regex::new(line?.trim())?; // Iterate over each search result in the map and check if it matches the regex pattern for (url, search_result) in map_to_be_filtered.clone().into_iter() { diff --git a/src/server/routes.rs b/src/server/routes.rs index e17a452..3d69e78 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -2,7 +2,10 @@ //! meta search engine website and provide appropriate response to each route/page //! when requested. -use std::fs::read_to_string; +use std::{ + fs::{read_to_string, File}, + io::{BufRead, BufReader, Read}, +}; use crate::{ cache::cacher::RedisCache, @@ -13,12 +16,13 @@ use crate::{ }; use actix_web::{get, web, HttpRequest, HttpResponse}; use handlebars::Handlebars; +use regex::Regex; use serde::Deserialize; use tokio::join; // ---- Constants ---- /// Initialize redis cache connection once and store it on the heap. -const REDIS_CACHE: async_once_cell::OnceCell = async_once_cell::OnceCell::new(); +static REDIS_CACHE: async_once_cell::OnceCell = async_once_cell::OnceCell::new(); /// A named struct which deserializes all the user provided search parameters and stores them. /// @@ -32,6 +36,7 @@ const REDIS_CACHE: async_once_cell::OnceCell = async_once_cell::Once struct SearchParams { q: Option, page: Option, + safesearch: Option, } /// Handles the route of index page or main page of the `websurfx` meta search engine website. @@ -105,42 +110,58 @@ pub async fn search( None => 1, }; + let safe_search: u8 = match config.safe_search { + 3..=4 => config.safe_search, + _ => match ¶ms.safesearch { + Some(safesearch) => match safesearch { + 0..=2 => *safesearch, + _ => 1, + }, + None => config.safe_search, + }, + }; + let (_, results, _) = join!( results( format!( - "http://{}:{}/search?q={}&page={}", + "http://{}:{}/search?q={}&page={}&safesearch={}", config.binding_ip, config.port, query, - page - 1 + page - 1, + safe_search ), &config, query, page - 1, - &req, + req.clone(), + safe_search ), results( format!( - "http://{}:{}/search?q={}&page={}", - config.binding_ip, config.port, query, page + "http://{}:{}/search?q={}&page={}&safesearch={}", + config.binding_ip, config.port, query, page, safe_search ), &config, query, page, - &req, + req.clone(), + safe_search ), results( format!( - "http://{}:{}/search?q={}&page={}", + "http://{}:{}/search?q={}&page={}&safesearch={}", config.binding_ip, config.port, query, - page + 1 + page + 1, + safe_search ), &config, query, page + 1, - &req, + req.clone(), + safe_search ) ); @@ -160,9 +181,10 @@ async fn results( config: &Config, query: &str, page: u32, - req: &HttpRequest, + req: HttpRequest, + safe_search: u8, ) -> Result> { - let redis_cache: RedisCache = REDIS_CACHE + let mut redis_cache: RedisCache = REDIS_CACHE .get_or_init(async { // Initialize redis cache connection pool only one and store it in the heap. RedisCache::new(&config.redis_url, 5).await.unwrap() @@ -178,6 +200,23 @@ async fn results( match cached_results_json { Ok(results) => Ok(serde_json::from_str::(&results)?), Err(_) => { + if safe_search == 4 { + let mut results: SearchResults = SearchResults::default(); + let mut _flag: bool = + is_match_from_filter_list(file_path(FileType::BlockList)?, query)?; + _flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?; + + if _flag { + results.set_disallowed(); + results.add_style(&config.style); + results.set_page_query(query); + redis_cache + .cache_results(&serde_json::to_string(&results)?, &url) + .await?; + return Ok(results); + } + } + // check if the cookie value is empty or not if it is empty then use the // default selected upstream search engines from the config file otherwise // parse the non-empty cookie and grab the user selected engines from the @@ -199,6 +238,7 @@ async fn results( config.debug, &engines, config.request_timeout, + safe_search, ) .await? } @@ -210,14 +250,16 @@ async fn results( config.debug, &config.upstream_search_engines, config.request_timeout, + safe_search, ) .await? } }; - + if results.engine_errors_info().is_empty() && results.results().is_empty() { + results.set_filtered(); + } results.add_style(&config.style); redis_cache - .clone() .cache_results(&serde_json::to_string(&results)?, &url) .await?; Ok(results) @@ -225,6 +267,22 @@ async fn results( } } +fn is_match_from_filter_list( + file_path: &str, + query: &str, +) -> Result> { + let mut flag = false; + let mut reader = BufReader::new(File::open(file_path)?); + for line in reader.by_ref().lines() { + let re = Regex::new(&line?)?; + if re.is_match(query) { + flag = true; + break; + } + } + Ok(flag) +} + /// Handles the route of robots.txt page of the `websurfx` meta search engine website. #[get("/robots.txt")] pub async fn robots_data(_req: HttpRequest) -> Result> { diff --git a/websurfx/config.lua b/websurfx/config.lua index 4f2633c..fb6c4fe 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -11,6 +11,17 @@ production_use = false -- whether to use production mode or not (in other words -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests. request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds). +-- ### Search ### +-- Filter results based on different levels. The levels provided are: +-- {{ +-- 0 - None +-- 1 - Low +-- 2 - Moderate +-- 3 - High +-- 4 - Aggressive +-- }} +safe_search = 2 + -- ### Website ### -- The different colorschemes provided are: -- {{