diff --git a/Cargo.lock b/Cargo.lock index f1b09d3..878023b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4066,7 +4066,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" [[package]] name = "websurfx" -version = "1.2.30" +version = "1.2.35" dependencies = [ "actix-cors", "actix-files", diff --git a/Cargo.toml b/Cargo.toml index 56e608f..40fe692 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "1.2.30" +version = "1.2.35" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" diff --git a/docs/instances.md b/docs/instances.md index 67d982b..970e372 100644 --- a/docs/instances.md +++ b/docs/instances.md @@ -6,7 +6,8 @@ This page provides a list of `Websurfx` instances provided by us and our communi |URL|Network|Version|Location|Behind Cloudflare?|Maintained By|TLS|IPv6|Comment| |-|-|-|-|-|-|-|-|-| -|https://alamin655-websurfx.hf.space/|www|v0.21.4|πŸ‡ΊπŸ‡Έ US||[websurfx project](https://github.com/neon-mmd/websurfx)|βœ…||| - +|https://websurfx.co/|www|edge|πŸ‡ΊπŸ‡Έ US|||βœ…|❌|| +|https://websurfx.onrender.com/|www|edge|πŸ‡ΊπŸ‡Έ US|||βœ…|❌|| +|https://alamin655-websurfx.hf.space/|www|v0.21.4|πŸ‡ΊπŸ‡Έ US||[websurfx project](https://github.com/neon-mmd/websurfx)|βœ…|❌|| [⬅️ Go back to Home](./README.md) diff --git a/public/static/themes/simple.css b/public/static/themes/simple.css index 2670c22..7c01ca5 100644 --- a/public/static/themes/simple.css +++ b/public/static/themes/simple.css @@ -278,6 +278,7 @@ footer { flex-direction: column; justify-content: space-between; margin: 2rem 0; + content-visibility: auto; } .results_aggregated .result { diff --git a/src/bin/websurfx.rs b/src/bin/websurfx.rs index d80c8e0..aad5806 100644 --- a/src/bin/websurfx.rs +++ b/src/bin/websurfx.rs @@ -5,7 +5,7 @@ use mimalloc::MiMalloc; use std::net::TcpListener; -use websurfx::{cache::cacher::Cache, config::parser::Config, run}; +use websurfx::{cache::cacher::create_cache, config::parser::Config, run}; /// A dhat heap memory profiler #[cfg(feature = "dhat-heap")] @@ -31,7 +31,7 @@ async fn main() -> std::io::Result<()> { // Initialize the parsed config file. let config = Config::parse(false).unwrap(); - let cache = Cache::build(&config).await; + let cache = create_cache(&config).await; log::info!( "started server on port {} and IP {}", diff --git a/src/cache/cacher.rs b/src/cache/cacher.rs index 12f88ff..577395c 100644 --- a/src/cache/cacher.rs +++ b/src/cache/cacher.rs @@ -14,24 +14,10 @@ use super::error::CacheError; #[cfg(feature = "redis-cache")] use super::redis_cacher::RedisCache; -/// Different implementations for caching, currently it is possible to cache in-memory or in Redis. -#[derive(Clone)] -pub enum Cache { - /// Caching is disabled - Disabled, - #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))] - /// Encapsulates the Redis based cache - Redis(RedisCache), - #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] - /// Contains the in-memory cache. - InMemory(MokaCache), - #[cfg(all(feature = "redis-cache", feature = "memory-cache"))] - /// Contains both the in-memory cache and Redis based cache - Hybrid(RedisCache, MokaCache), -} - -impl Cache { - /// A function that builds the cache from the given configuration. +/// Abstraction trait for common methods provided by a cache backend. +#[async_trait::async_trait] +pub trait Cacher: Send + Sync { + // A function that builds the cache from the given configuration. /// /// # Arguments /// @@ -39,89 +25,10 @@ impl Cache { /// /// # Returns /// - /// It returns a newly initialized variant based on the feature enabled by the user. - pub async fn build(_config: &Config) -> Self { - #[cfg(all(feature = "redis-cache", feature = "memory-cache"))] - { - log::info!("Using a hybrid cache"); - Cache::new_hybrid( - RedisCache::new(&_config.redis_url, 5) - .await - .expect("Redis cache configured"), - ) - } - #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))] - { - log::info!("Listening redis server on {}", &_config.redis_url); - Cache::new( - RedisCache::new(&_config.redis_url, 5) - .await - .expect("Redis cache configured"), - ) - } - #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] - { - log::info!("Using an in-memory cache"); - Cache::new_in_memory() - } - #[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))] - { - log::info!("Caching is disabled"); - Cache::Disabled - } - } - - /// A function that initializes a new connection pool struct. - /// - /// # Arguments - /// - /// * `redis_cache` - It takes the newly initialized connection pool struct as an argument. - /// - /// # Returns - /// - /// It returns a `Redis` variant with the newly initialized connection pool struct. - #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))] - pub fn new(redis_cache: RedisCache) -> Self { - Cache::Redis(redis_cache) - } - - /// A function that initializes the `in memory` cache which is used to cache the results in - /// memory with the search engine thus improving performance by making retrieval and caching of - /// results faster. - /// - /// # Returns - /// - /// It returns a `InMemory` variant with the newly initialized in memory cache type. - #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] - pub fn new_in_memory() -> Self { - let cache = MokaCache::builder() - .max_capacity(1000) - .time_to_live(Duration::from_secs(60)) - .build(); - Cache::InMemory(cache) - } - - /// A function that initializes both in memory cache and redis client connection for being used - /// for managing hybrid cache which increases resiliancy of the search engine by allowing the - /// cache to switch to `in memory` caching if the `redis` cache server is temporarily - /// unavailable. - /// - /// # Arguments - /// - /// * `redis_cache` - It takes `redis` client connection struct as an argument. - /// - /// # Returns - /// - /// It returns a tuple variant `Hybrid` storing both the in-memory cache type and the `redis` - /// client connection struct. - #[cfg(all(feature = "redis-cache", feature = "memory-cache"))] - pub fn new_hybrid(redis_cache: RedisCache) -> Self { - let cache = MokaCache::builder() - .max_capacity(1000) - .time_to_live(Duration::from_secs(60)) - .build(); - Cache::Hybrid(redis_cache, cache) - } + /// It returns a newly initialized backend based on the feature enabled by the user. + async fn build(config: &Config) -> Self + where + Self: Sized; /// A function which fetches the cached json results as json string. /// @@ -133,31 +40,7 @@ impl Cache { /// /// Returns the `SearchResults` from the cache if the program executes normally otherwise /// returns a `CacheError` if the results cannot be retrieved from the cache. - pub async fn cached_json(&mut self, _url: &str) -> Result> { - match self { - Cache::Disabled => Err(Report::new(CacheError::MissingValue)), - #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))] - Cache::Redis(redis_cache) => { - let json = redis_cache.cached_json(_url).await?; - Ok(serde_json::from_str::(&json) - .map_err(|_| CacheError::SerializationError)?) - } - #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] - Cache::InMemory(in_memory) => match in_memory.get(&_url.to_string()) { - Some(res) => Ok(res), - None => Err(Report::new(CacheError::MissingValue)), - }, - #[cfg(all(feature = "redis-cache", feature = "memory-cache"))] - Cache::Hybrid(redis_cache, in_memory) => match redis_cache.cached_json(_url).await { - Ok(res) => Ok(serde_json::from_str::(&res) - .map_err(|_| CacheError::SerializationError)?), - Err(_) => match in_memory.get(&_url.to_string()) { - Some(res) => Ok(res), - None => Err(Report::new(CacheError::MissingValue)), - }, - }, - } - } + async fn cached_results(&mut self, url: &str) -> Result>; /// A function which caches the results by using the `url` as the key and /// `json results` as the value and stores it in the cache @@ -172,44 +55,163 @@ impl Cache { /// Returns a unit type if the program caches the given search results without a failure /// otherwise it returns a `CacheError` if the search results cannot be cached due to a /// failure. - pub async fn cache_results( + async fn cache_results( + &mut self, + search_results: &SearchResults, + url: &str, + ) -> Result<(), Report>; + + /// A helper function which computes the hash of the url and formats and returns it as string. + /// + /// # Arguments + /// + /// * `url` - It takes an url as string. + fn hash_url(&self, url: &str) -> String { + blake3::hash(url.as_bytes()).to_string() + } +} + +#[cfg(feature = "redis-cache")] +#[async_trait::async_trait] +impl Cacher for RedisCache { + async fn build(config: &Config) -> Self { + log::info!( + "Initialising redis cache. Listening to {}", + &config.redis_url + ); + RedisCache::new(&config.redis_url, 5, config.cache_expiry_time) + .await + .expect("Redis cache configured") + } + + async fn cached_results(&mut self, url: &str) -> Result> { + let hashed_url_string: &str = &self.hash_url(url); + let json = self.cached_json(hashed_url_string).await?; + Ok(serde_json::from_str::(&json) + .map_err(|_| CacheError::SerializationError)?) + } + + async fn cache_results( + &mut self, + search_results: &SearchResults, + url: &str, + ) -> Result<(), Report> { + let json = + serde_json::to_string(search_results).map_err(|_| CacheError::SerializationError)?; + let hashed_url_string = self.hash_url(url); + self.cache_json(&json, &hashed_url_string).await + } +} + +/// Memory based cache backend. +#[cfg(feature = "memory-cache")] +pub struct InMemoryCache { + /// The backend cache which stores data. + cache: MokaCache, +} + +#[cfg(feature = "memory-cache")] +#[async_trait::async_trait] +impl Cacher for InMemoryCache { + async fn build(config: &Config) -> Self { + log::info!("Initialising in-memory cache"); + + InMemoryCache { + cache: MokaCache::builder() + .time_to_live(Duration::from_secs(config.cache_expiry_time.into())) + .build(), + } + } + + async fn cached_results(&mut self, url: &str) -> Result> { + let hashed_url_string = self.hash_url(url); + match self.cache.get(&hashed_url_string) { + Some(res) => Ok(res), + None => Err(Report::new(CacheError::MissingValue)), + } + } + + async fn cache_results( + &mut self, + search_results: &SearchResults, + url: &str, + ) -> Result<(), Report> { + let hashed_url_string = self.hash_url(url); + self.cache.insert(hashed_url_string, search_results.clone()); + Ok(()) + } +} + +/// Cache backend which utilises both memory and redis based caches. +/// +/// The hybrid cache system uses both the types of cache to ensure maximum availability. +/// The set method sets the key, value pair in both the caches. Therefore in a case where redis +/// cache becomes unavailable, the backend will retreive the value from in-memory cache. +#[cfg(all(feature = "memory-cache", feature = "redis-cache"))] +pub struct HybridCache { + /// The in-memory backend cache which stores data. + memory_cache: InMemoryCache, + /// The redis backend cache which stores data. + redis_cache: RedisCache, +} + +#[cfg(all(feature = "memory-cache", feature = "redis-cache"))] +#[async_trait::async_trait] +impl Cacher for HybridCache { + async fn build(config: &Config) -> Self { + log::info!("Initialising hybrid cache"); + HybridCache { + memory_cache: InMemoryCache::build(config).await, + redis_cache: RedisCache::build(config).await, + } + } + + async fn cached_results(&mut self, url: &str) -> Result> { + match self.redis_cache.cached_results(url).await { + Ok(res) => Ok(res), + Err(_) => self.memory_cache.cached_results(url).await, + } + } + + async fn cache_results( + &mut self, + search_results: &SearchResults, + url: &str, + ) -> Result<(), Report> { + self.redis_cache.cache_results(search_results, url).await?; + self.memory_cache.cache_results(search_results, url).await?; + + Ok(()) + } +} + +/// Dummy cache backend +pub struct DisabledCache; + +#[async_trait::async_trait] +impl Cacher for DisabledCache { + async fn build(_config: &Config) -> Self { + log::info!("Caching is disabled"); + DisabledCache + } + + async fn cached_results(&mut self, _url: &str) -> Result> { + Err(Report::new(CacheError::MissingValue)) + } + + async fn cache_results( &mut self, _search_results: &SearchResults, _url: &str, ) -> Result<(), Report> { - match self { - Cache::Disabled => Ok(()), - #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))] - Cache::Redis(redis_cache) => { - let json = serde_json::to_string(_search_results) - .map_err(|_| CacheError::SerializationError)?; - redis_cache.cache_results(&json, _url).await - } - #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] - Cache::InMemory(cache) => { - cache.insert(_url.to_string(), _search_results.clone()); - Ok(()) - } - #[cfg(all(feature = "memory-cache", feature = "redis-cache"))] - Cache::Hybrid(redis_cache, cache) => { - let json = serde_json::to_string(_search_results) - .map_err(|_| CacheError::SerializationError)?; - match redis_cache.cache_results(&json, _url).await { - Ok(_) => Ok(()), - Err(_) => { - cache.insert(_url.to_string(), _search_results.clone()); - Ok(()) - } - } - } - } + Ok(()) } } /// A structure to efficiently share the cache between threads - as it is protected by a Mutex. pub struct SharedCache { /// The internal cache protected from concurrent access by a mutex - cache: Mutex, + cache: Mutex>, } impl SharedCache { @@ -220,9 +222,9 @@ impl SharedCache { /// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type. /// /// Returns a newly constructed `SharedCache` struct. - pub fn new(cache: Cache) -> Self { + pub fn new(cache: impl Cacher + 'static) -> Self { Self { - cache: Mutex::new(cache), + cache: Mutex::new(Box::new(cache)), } } @@ -237,9 +239,9 @@ impl SharedCache { /// /// Returns a `SearchResults` struct containing the search results from the cache if nothing /// goes wrong otherwise returns a `CacheError`. - pub async fn cached_json(&self, url: &str) -> Result> { + pub async fn cached_results(&self, url: &str) -> Result> { let mut mut_cache = self.cache.lock().await; - mut_cache.cached_json(url).await + mut_cache.cached_results(url).await } /// A setter function which caches the results by using the `url` as the key and @@ -265,3 +267,18 @@ impl SharedCache { mut_cache.cache_results(search_results, url).await } } + +/// A function to initialise the cache backend. +pub async fn create_cache(config: &Config) -> impl Cacher { + #[cfg(all(feature = "redis-cache", feature = "memory-cache"))] + return HybridCache::build(config).await; + + #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] + return InMemoryCache::build(config).await; + + #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))] + return RedisCache::build(config).await; + + #[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))] + return DisabledCache::build(config).await; +} diff --git a/src/cache/redis_cacher.rs b/src/cache/redis_cacher.rs index 0e8ec38..7d3295a 100644 --- a/src/cache/redis_cacher.rs +++ b/src/cache/redis_cacher.rs @@ -1,7 +1,6 @@ //! This module provides the functionality to cache the aggregated results fetched and aggregated //! from the upstream search engines in a json format. -use blake3::hash; use error_stack::Report; use futures::future::try_join_all; use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError}; @@ -19,6 +18,8 @@ pub struct RedisCache { pool_size: u8, /// It stores the index of which connection is being used at the moment. current_connection: u8, + /// It stores the max TTL for keys. + cache_ttl: u16, } impl RedisCache { @@ -37,6 +38,7 @@ impl RedisCache { pub async fn new( redis_connection_url: &str, pool_size: u8, + cache_ttl: u16, ) -> Result> { let client = Client::open(redis_connection_url)?; let mut tasks: Vec<_> = Vec::new(); @@ -49,36 +51,27 @@ impl RedisCache { connection_pool: try_join_all(tasks).await?, pool_size, current_connection: Default::default(), + cache_ttl, }; Ok(redis_cache) } - /// A helper function which computes the hash of the url and formats and returns it as string. + /// A function which fetches the cached json as json string from the redis server. /// /// # Arguments /// - /// * `url` - It takes an url as string. - fn hash_url(&self, url: &str) -> String { - format!("{:?}", blake3::hash(url.as_bytes())) - } - - /// A function which fetches the cached json results as json string from the redis server. - /// - /// # Arguments - /// - /// * `url` - It takes an url as a string. + /// * `key` - It takes a string as key. /// /// # Error /// - /// Returns the results as a String from the cache on success otherwise returns a `CacheError` + /// Returns the json as a String from the cache on success otherwise returns a `CacheError` /// on a failure. - pub async fn cached_json(&mut self, url: &str) -> Result> { + pub async fn cached_json(&mut self, key: &str) -> Result> { self.current_connection = Default::default(); - let hashed_url_string: &str = &self.hash_url(url); let mut result: Result = self.connection_pool [self.current_connection as usize] - .get(hashed_url_string) + .get(key) .await; // Code to check whether the current connection being used is dropped with connection error @@ -99,7 +92,7 @@ impl RedisCache { )); } result = self.connection_pool[self.current_connection as usize] - .get(hashed_url_string) + .get(key) .await; continue; } @@ -110,30 +103,29 @@ impl RedisCache { } } - /// A function which caches the results by using the hashed `url` as the key and + /// A function which caches the json by using the key and /// `json results` as the value and stores it in redis server with ttl(time to live) /// set to 60 seconds. /// /// # Arguments /// /// * `json_results` - It takes the json results string as an argument. - /// * `url` - It takes the url as a String. + /// * `key` - It takes the key as a String. /// /// # Error /// /// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError` /// on a failure. - pub async fn cache_results( + pub async fn cache_json( &mut self, json_results: &str, - url: &str, + key: &str, ) -> Result<(), Report> { self.current_connection = Default::default(); - let hashed_url_string: &str = &self.hash_url(url); let mut result: Result<(), RedisError> = self.connection_pool [self.current_connection as usize] - .set_ex(hashed_url_string, json_results, 60) + .set_ex(key, json_results, self.cache_ttl.into()) .await; // Code to check whether the current connection being used is dropped with connection error @@ -154,7 +146,7 @@ impl RedisCache { )); } result = self.connection_pool[self.current_connection as usize] - .set_ex(hashed_url_string, json_results, 60) + .set_ex(key, json_results, 60) .await; continue; } diff --git a/src/config/parser.rs b/src/config/parser.rs index 1b8ba16..b33936a 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -1,7 +1,7 @@ //! This module provides the functionality to parse the lua config and convert the config options //! into rust readable form. -use crate::handler::paths::{file_path, FileType}; +use crate::handler::{file_path, FileType}; use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; @@ -21,6 +21,9 @@ pub struct Config { /// It stores the redis connection url address on which the redis /// client should connect. pub redis_url: String, + #[cfg(any(feature = "redis-cache", feature = "memory-cache"))] + /// It stores the max TTL for search results in cache. + pub cache_expiry_time: u16, /// It stores the option to whether enable or disable production use. pub aggregator: AggregatorConfig, /// It stores the option to whether enable or disable logs. @@ -93,6 +96,19 @@ impl Config { } }; + #[cfg(any(feature = "redis-cache", feature = "memory-cache"))] + let parsed_cet = globals.get::<_, u16>("cache_expiry_time")?; + let cache_expiry_time = match parsed_cet { + 0..=59 => { + log::error!( + "Config Error: The value of `cache_expiry_time` must be greater than 60" + ); + log::error!("Falling back to using the value `60` for the option"); + 60 + } + _ => parsed_cet, + }; + Ok(Config { port: globals.get::<_, u16>("port")?, binding_ip: globals.get::<_, String>("binding_ip")?, @@ -116,6 +132,8 @@ impl Config { time_limit: rate_limiter["time_limit"], }, safe_search, + #[cfg(any(feature = "redis-cache", feature = "memory-cache"))] + cache_expiry_time, }) } } diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 188767d..f6cf2d7 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -1,5 +1,115 @@ -//! This module provides modules which provide the functionality to handle paths for different -//! files present on different paths and provide one appropriate path on which it is present and -//! can be used. +//! This module provides the functionality to handle theme folder present on different paths and +//! provide one appropriate path on which it is present and can be used. -pub mod paths; +use std::collections::HashMap; +use std::io::Error; +use std::path::Path; +use std::sync::OnceLock; + +// ------- Constants -------- +/// The constant holding the name of the theme folder. +const PUBLIC_DIRECTORY_NAME: &str = "public"; +/// The constant holding the name of the common folder. +const COMMON_DIRECTORY_NAME: &str = "websurfx"; +/// The constant holding the name of the config file. +const CONFIG_FILE_NAME: &str = "config.lua"; +/// The constant holding the name of the AllowList text file. +const ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; +/// The constant holding the name of the BlockList text file. +const BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; + +/// An enum type which provides different variants to handle paths for various files/folders. +#[derive(Hash, PartialEq, Eq, Debug)] +pub enum FileType { + /// This variant handles all the paths associated with the config file. + Config, + /// This variant handles all the paths associated with the Allowlist text file. + AllowList, + /// This variant handles all the paths associated with the BlockList text file. + BlockList, + /// This variant handles all the paths associated with the public folder (Theme folder). + Theme, +} + +/// A static variable which stores the different filesystem paths for various file/folder types. +static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock>> = OnceLock::new(); + +/// A function which returns an appropriate path for thr provided file type by checking if the path +/// for the given file type exists on that path. +/// +/// # Error +/// +/// Returns a ` folder/file not found!!` error if the give file_type folder/file is not +/// present on the path on which it is being tested. +/// +/// # Example +/// +/// If this function is give the file_type of Theme variant then the theme folder is checked by the +/// following steps: +/// +/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) +/// 2. Under project folder ( or codebase in other words) if it is not present +/// here then it returns an error as mentioned above. +pub fn file_path(file_type: FileType) -> Result<&'static str, Error> { + let home = env!("HOME"); + + let file_path: &Vec = FILE_PATHS_FOR_DIFF_FILE_TYPES + .get_or_init(|| { + HashMap::from([ + ( + FileType::Config, + vec![ + format!( + "{}/.config/{}/{}", + home, COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), + ], + ), + ( + FileType::Theme, + vec![ + format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME), + format!("./{}/", PUBLIC_DIRECTORY_NAME), + ], + ), + ( + FileType::AllowList, + vec![ + format!( + "{}/.config/{}/{}", + home, COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), + ], + ), + ( + FileType::BlockList, + vec![ + format!( + "{}/.config/{}/{}", + home, COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), + ], + ), + ]) + }) + .get(&file_type) + .unwrap(); + + for path in file_path.iter() { + if Path::new(path).exists() { + return Ok(path); + } + } + + // if no of the configs above exist, return error + Err(Error::new( + std::io::ErrorKind::NotFound, + format!("{:?} file/folder not found!!", file_type), + )) +} diff --git a/src/handler/paths.rs b/src/handler/paths.rs deleted file mode 100644 index 9ea5fff..0000000 --- a/src/handler/paths.rs +++ /dev/null @@ -1,119 +0,0 @@ -//! This module provides the functionality to handle theme folder present on different paths and -//! provide one appropriate path on which it is present and can be used. - -use std::collections::HashMap; -use std::io::Error; -use std::path::Path; -use std::sync::OnceLock; - -// ------- Constants -------- -/// The constant holding the name of the theme folder. -const PUBLIC_DIRECTORY_NAME: &str = "public"; -/// The constant holding the name of the common folder. -const COMMON_DIRECTORY_NAME: &str = "websurfx"; -/// The constant holding the name of the config file. -const CONFIG_FILE_NAME: &str = "config.lua"; -/// The constant holding the name of the AllowList text file. -const ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; -/// The constant holding the name of the BlockList text file. -const BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; - -/// An enum type which provides different variants to handle paths for various files/folders. -#[derive(Hash, PartialEq, Eq, Debug)] -pub enum FileType { - /// This variant handles all the paths associated with the config file. - Config, - /// This variant handles all the paths associated with the Allowlist text file. - AllowList, - /// This variant handles all the paths associated with the BlockList text file. - BlockList, - /// This variant handles all the paths associated with the public folder (Theme folder). - Theme, -} - -/// A static variable which stores the different filesystem paths for various file/folder types. -static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock>> = OnceLock::new(); - -/// A function which returns an appropriate path for thr provided file type by checking if the path -/// for the given file type exists on that path. -/// -/// # Error -/// -/// Returns a ` folder/file not found!!` error if the give file_type folder/file is not -/// present on the path on which it is being tested. -/// -/// # Example -/// -/// If this function is give the file_type of Theme variant then the theme folder is checked by the -/// following steps: -/// -/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) -/// 2. Under project folder ( or codebase in other words) if it is not present -/// here then it returns an error as mentioned above. -pub fn file_path(file_type: FileType) -> Result<&'static str, Error> { - let file_path: &Vec = FILE_PATHS_FOR_DIFF_FILE_TYPES - .get_or_init(|| { - HashMap::from([ - ( - FileType::Config, - vec![ - format!( - "{}/.config/{}/{}", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME, - CONFIG_FILE_NAME - ), - format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), - format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), - ], - ), - ( - FileType::Theme, - vec![ - format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME), - format!("./{}/", PUBLIC_DIRECTORY_NAME), - ], - ), - ( - FileType::AllowList, - vec![ - format!( - "{}/.config/{}/{}", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME, - ALLOWLIST_FILE_NAME - ), - format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), - format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), - ], - ), - ( - FileType::BlockList, - vec![ - format!( - "{}/.config/{}/{}", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME, - BLOCKLIST_FILE_NAME - ), - format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), - format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), - ], - ), - ]) - }) - .get(&file_type) - .unwrap(); - - for (idx, _) in file_path.iter().enumerate() { - if Path::new(file_path[idx].as_str()).exists() { - return Ok(std::mem::take(&mut &*file_path[idx])); - } - } - - // if no of the configs above exist, return error - Err(Error::new( - std::io::ErrorKind::NotFound, - format!("{:?} file/folder not found!!", file_type), - )) -} diff --git a/src/lib.rs b/src/lib.rs index 1d245fb..0d8f49d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,9 +22,9 @@ use actix_cors::Cors; use actix_files as fs; use actix_governor::{Governor, GovernorConfigBuilder}; use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer}; -use cache::cacher::{Cache, SharedCache}; +use cache::cacher::{Cacher, SharedCache}; use config::parser::Config; -use handler::paths::{file_path, FileType}; +use handler::{file_path, FileType}; /// Runs the web server on the provided TCP listener and returns a `Server` instance. /// @@ -40,14 +40,21 @@ use handler::paths::{file_path, FileType}; /// /// ```rust /// use std::net::TcpListener; -/// use websurfx::{config::parser::Config, run, cache::cacher::Cache}; +/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache}; /// -/// let config = Config::parse(true).unwrap(); -/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address"); -/// let cache = Cache::new_in_memory(); -/// let server = run(listener,config,cache).expect("Failed to start server"); +/// #[tokio::main] +/// async fn main(){ +/// let config = Config::parse(true).unwrap(); +/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address"); +/// let cache = create_cache(&config).await; +/// let server = run(listener,config,cache).expect("Failed to start server"); +/// } /// ``` -pub fn run(listener: TcpListener, config: Config, cache: Cache) -> std::io::Result { +pub fn run( + listener: TcpListener, + config: Config, + cache: impl Cacher + 'static, +) -> std::io::Result { let public_folder_path: &str = file_path(FileType::Theme)?; let cloned_config_threads_opt: u8 = config.threads; diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 5c53864..e7fc090 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -2,7 +2,7 @@ //! search engines and then removes duplicate results. use super::user_agent::random_user_agent; -use crate::handler::paths::{file_path, FileType}; +use crate::handler::{file_path, FileType}; use crate::models::{ aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, engine_models::{EngineError, EngineHandler}, diff --git a/src/server/router.rs b/src/server/router.rs index 31b7e6a..ea8387b 100644 --- a/src/server/router.rs +++ b/src/server/router.rs @@ -4,7 +4,7 @@ use crate::{ config::parser::Config, - handler::paths::{file_path, FileType}, + handler::{file_path, FileType}, }; use actix_web::{get, web, HttpRequest, HttpResponse}; use std::fs::read_to_string; diff --git a/src/server/routes/search.rs b/src/server/routes/search.rs index 35c9cc4..670ee60 100644 --- a/src/server/routes/search.rs +++ b/src/server/routes/search.rs @@ -3,7 +3,7 @@ use crate::{ cache::cacher::SharedCache, config::parser::Config, - handler::paths::{file_path, FileType}, + handler::{file_path, FileType}, models::{ aggregation_models::SearchResults, engine_models::{EngineError, EngineHandler}, @@ -47,54 +47,25 @@ pub async fn search( .insert_header(("location", "/")) .finish()); } - let page = match ¶ms.page { - Some(page) => *page, - None => 1, - }; - let (_, results, _) = join!( + let get_results = |page| { results( - format!( - "http://{}:{}/search?q={}&page={}&safesearch=", - config.binding_ip, - config.port, - query, - page - 1, - ), - &config, - &cache, - query, - page - 1, - req.clone(), - ¶ms.safesearch - ), - results( - format!( - "http://{}:{}/search?q={}&page={}&safesearch=", - config.binding_ip, config.port, query, page - ), &config, &cache, query, page, req.clone(), - ¶ms.safesearch - ), - results( - format!( - "http://{}:{}/search?q={}&page={}&safesearch=", - config.binding_ip, - config.port, - query, - page + 1, - ), - &config, - &cache, - query, - page + 1, - req.clone(), - ¶ms.safesearch + ¶ms.safesearch, ) + }; + + // .max(1) makes sure that the page > 0. + let page = params.page.unwrap_or(1).max(1); + + let (_, results, _) = join!( + get_results(page - 1), + get_results(page), + get_results(page + 1) ); Ok(HttpResponse::Ok().body( @@ -129,7 +100,6 @@ pub async fn search( /// It returns the `SearchResults` struct if the search results could be successfully fetched from /// the cache or from the upstream search engines otherwise it returns an appropriate error. async fn results( - url: String, config: &Config, cache: &web::Data, query: &str, @@ -137,33 +107,40 @@ async fn results( req: HttpRequest, safe_search: &Option, ) -> Result> { + // eagerly parse cookie value to evaluate safe search level + let cookie_value = req.cookie("appCookie"); + + let cookie_value: Option> = cookie_value + .as_ref() + .and_then(|cv| serde_json::from_str(cv.name_value().1).ok()); + + let safe_search_level = get_safesearch_level( + safe_search, + &cookie_value.as_ref().map(|cv| cv.safe_search_level), + config.safe_search, + ); + + let cache_key = format!( + "http://{}:{}/search?q={}&page={}&safesearch={}", + config.binding_ip, config.port, query, page, safe_search_level + ); + // fetch the cached results json. - let cached_results = cache.cached_json(&url).await; + let cached_results = cache.cached_results(&cache_key).await; // check if fetched cache results was indeed fetched or it was an error and if so // handle the data accordingly. match cached_results { Ok(results) => Ok(results), Err(_) => { - let mut safe_search_level: u8 = match config.safe_search { - 3..=4 => config.safe_search, - _ => match safe_search { - Some(safesearch) => match safesearch { - 0..=2 => *safesearch, - _ => config.safe_search, - }, - None => config.safe_search, - }, - }; - if safe_search_level == 4 { let mut results: SearchResults = SearchResults::default(); - let mut _flag: bool = - is_match_from_filter_list(file_path(FileType::BlockList)?, query)?; - _flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?; - if _flag { + let flag: bool = + !is_match_from_filter_list(file_path(FileType::BlockList)?, query)?; + // Return early when query contains disallowed words, + if flag { results.set_disallowed(); - cache.cache_results(&results, &url).await?; + cache.cache_results(&results, &cache_key).await?; results.set_safe_search_level(safe_search_level); return Ok(results); } @@ -173,28 +150,14 @@ async fn results( // default selected upstream search engines from the config file otherwise // parse the non-empty cookie and grab the user selected engines from the // UI and use that. - let mut results: SearchResults = match req.cookie("appCookie") { + let mut results: SearchResults = match cookie_value { Some(cookie_value) => { - let cookie_value: Cookie<'_> = - serde_json::from_str(cookie_value.name_value().1)?; - let engines: Vec = cookie_value .engines .iter() .filter_map(|name| EngineHandler::new(name).ok()) .collect(); - safe_search_level = match config.safe_search { - 3..=4 => config.safe_search, - _ => match safe_search { - Some(safesearch) => match safesearch { - 0..=2 => *safesearch, - _ => config.safe_search, - }, - None => cookie_value.safe_search_level, - }, - }; - match engines.is_empty() { false => { aggregate( @@ -239,9 +202,7 @@ async fn results( { results.set_filtered(); } - cache - .cache_results(&results, &(format!("{url}{safe_search_level}"))) - .await?; + cache.cache_results(&results, &cache_key).await?; results.set_safe_search_level(safe_search_level); Ok(results) } @@ -264,14 +225,34 @@ fn is_match_from_filter_list( file_path: &str, query: &str, ) -> Result> { - let mut flag = false; let mut reader = BufReader::new(File::open(file_path)?); for line in reader.by_ref().lines() { let re = Regex::new(&line?)?; if re.is_match(query) { - flag = true; - break; + return Ok(true); } } - Ok(flag) + + Ok(false) +} + +/// A helper function which returns the safe search level based on the url params +/// and cookie value. +/// +/// # Argurments +/// +/// * `safe_search` - Safe search level from the url. +/// * `cookie` - User's cookie +/// * `default` - Safe search level to fall back to +fn get_safesearch_level(safe_search: &Option, cookie: &Option, default: u8) -> u8 { + match safe_search { + Some(ss) => { + if *ss >= 3 { + default + } else { + *ss + } + } + None => cookie.unwrap_or(default), + } } diff --git a/src/templates/partials/footer.rs b/src/templates/partials/footer.rs index beaa8ca..2ced457 100644 --- a/src/templates/partials/footer.rs +++ b/src/templates/partials/footer.rs @@ -12,7 +12,7 @@ pub fn footer() -> Markup { html!( footer{ div{ - span{"Powered By "b{"Websurfx"}}span{"-"}span{"a lightening fast, privacy respecting, secure meta + span{"Powered By "b{"Websurfx"}}span{"-"}span{"a lightning-fast, privacy respecting, secure meta search engine"} } div{ diff --git a/src/templates/partials/settings_tabs/user_interface.rs b/src/templates/partials/settings_tabs/user_interface.rs index 90e1ce9..c978c3e 100644 --- a/src/templates/partials/settings_tabs/user_interface.rs +++ b/src/templates/partials/settings_tabs/user_interface.rs @@ -1,6 +1,6 @@ //! A module that handles the user interface tab for setting page view in the `websurfx` frontend. -use crate::handler::paths::{file_path, FileType}; +use crate::handler::{file_path, FileType}; use maud::{html, Markup}; use std::fs::read_dir; diff --git a/src/templates/views/search.rs b/src/templates/views/search.rs index 440d585..9fef7f0 100644 --- a/src/templates/views/search.rs +++ b/src/templates/views/search.rs @@ -93,7 +93,7 @@ pub fn search( img src="./images/no_selection.png" alt="Image of a white cross inside a red circle"; } } - @else{ + @else { .result_not_found { p{"Your search - "{(query)}" - did not match any documents."} p class="suggestions"{"Suggestions:"} diff --git a/tests/index.rs b/tests/index.rs index 3bd7381..892e0b9 100644 --- a/tests/index.rs +++ b/tests/index.rs @@ -3,18 +3,13 @@ use std::net::TcpListener; use websurfx::{config::parser::Config, run, templates::views}; // Starts a new instance of the HTTP server, bound to a random available port -fn spawn_app() -> String { +async fn spawn_app() -> String { // Binding to port 0 will trigger the OS to assign a port for us. let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port"); let port = listener.local_addr().unwrap().port(); let config = Config::parse(false).unwrap(); - let server = run( - listener, - config, - #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] - websurfx::cache::cacher::Cache::new_in_memory(), - ) - .expect("Failed to bind address"); + let cache = websurfx::cache::cacher::create_cache(&config).await; + let server = run(listener, config, cache).expect("Failed to bind address"); tokio::spawn(server); format!("http://127.0.0.1:{}/", port) @@ -22,7 +17,7 @@ fn spawn_app() -> String { #[tokio::test] async fn test_index() { - let address = spawn_app(); + let address = spawn_app().await; let client = reqwest::Client::new(); let res = client.get(address).send().await.unwrap(); diff --git a/websurfx/config.lua b/websurfx/config.lua index eeeb638..57d552c 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -47,7 +47,7 @@ theme = "simple" -- the theme name which should be used for the website -- ### Caching ### redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on. - +cache_expiry_time = 600 -- This option takes the expiry time of the search results (value in seconds and the value should be greater than or equal to 60 seconds). -- ### Search Engines ### upstream_search_engines = { DuckDuckGo = true,