From 440216871de346746494799709886b4b013a326e Mon Sep 17 00:00:00 2001 From: Milim <81323548+MilimTheTrueOne@users.noreply.github.com> Date: Mon, 3 Jul 2023 19:30:25 +0200 Subject: [PATCH 1/2] Rename Things, refactor some code BREAKING: renames `binding_ip_addr` to `binding_ip` and `redis_connection_url` to `redis_url`. Renames a lot of internals as well, but they are to many to mention. --- src/bin/websurfx.rs | 4 +- src/cache/cacher.rs | 8 +- src/{config_parser => config}/mod.rs | 0 src/{config_parser => config}/parser.rs | 12 +-- .../parser_models.rs | 0 src/engines/duckduckgo.rs | 2 +- src/engines/searx.rs | 2 +- src/handler/mod.rs | 2 +- ...public_path_handler.rs => public_paths.rs} | 20 +++-- src/lib.rs | 10 +-- .../aggregation_models.rs | 2 +- .../aggregator.rs | 0 .../mod.rs | 0 .../user_agent.rs | 0 src/server/routes.rs | 84 ++++++++++--------- tests/index.rs | 4 +- websurfx/config.lua | 4 +- 17 files changed, 80 insertions(+), 74 deletions(-) rename src/{config_parser => config}/mod.rs (100%) rename src/{config_parser => config}/parser.rs (90%) rename src/{config_parser => config}/parser_models.rs (100%) rename src/handler/{public_path_handler.rs => public_paths.rs} (68%) rename src/{search_results_handler => results}/aggregation_models.rs (99%) rename src/{search_results_handler => results}/aggregator.rs (100%) rename src/{search_results_handler => results}/mod.rs (100%) rename src/{search_results_handler => results}/user_agent.rs (100%) diff --git a/src/bin/websurfx.rs b/src/bin/websurfx.rs index 8661725..d8d7f5c 100644 --- a/src/bin/websurfx.rs +++ b/src/bin/websurfx.rs @@ -5,7 +5,7 @@ use std::net::TcpListener; -use websurfx::{config_parser::parser::Config, run}; +use websurfx::{config::parser::Config, run}; /// The function that launches the main server and registers all the routes of the website. /// @@ -26,7 +26,7 @@ async fn main() -> std::io::Result<()> { log::info!("started server on port {}", config.port); - let listener = TcpListener::bind((config.binding_ip_addr.clone(), config.port))?; + let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?; run(listener, config)?.await } diff --git a/src/cache/cacher.rs b/src/cache/cacher.rs index 87a6c6d..31494c4 100644 --- a/src/cache/cacher.rs +++ b/src/cache/cacher.rs @@ -32,7 +32,7 @@ impl RedisCache { /// # Arguments /// /// * `url` - It takes an url as string. - fn compute_url_hash(url: &str) -> String { + fn hash_url(url: &str) -> String { format!("{:?}", compute(url)) } @@ -41,8 +41,8 @@ impl RedisCache { /// # Arguments /// /// * `url` - It takes an url as a string. - pub fn cached_results_json(&mut self, url: &str) -> Result> { - let hashed_url_string = Self::compute_url_hash(url); + pub fn get_cached_json(&mut self, url: &str) -> Result> { + let hashed_url_string = Self::hash_url(url); Ok(self.connection.get(hashed_url_string)?) } @@ -59,7 +59,7 @@ impl RedisCache { json_results: String, url: &str, ) -> Result<(), Box> { - let hashed_url_string = Self::compute_url_hash(url); + let hashed_url_string = Self::hash_url(url); // put results_json into cache self.connection.set(&hashed_url_string, json_results)?; diff --git a/src/config_parser/mod.rs b/src/config/mod.rs similarity index 100% rename from src/config_parser/mod.rs rename to src/config/mod.rs diff --git a/src/config_parser/parser.rs b/src/config/parser.rs similarity index 90% rename from src/config_parser/parser.rs rename to src/config/parser.rs index 5b4c2d8..f760e7d 100644 --- a/src/config_parser/parser.rs +++ b/src/config/parser.rs @@ -14,16 +14,16 @@ static CONFIG_FILE_NAME: &str = "config.lua"; /// # Fields // /// * `port` - It stores the parsed port number option on which the server should launch. -/// * `binding_ip_addr` - It stores the parsed ip address option on which the server should launch +/// * `binding_ip` - It stores the parsed ip address option on which the server should launch /// * `style` - It stores the theming options for the website. -/// * `redis_connection_url` - It stores the redis connection url address on which the redis +/// * `redis_url` - It stores the redis connection url address on which the redis /// client should connect. #[derive(Clone)] pub struct Config { pub port: u16, - pub binding_ip_addr: String, + pub binding_ip: String, pub style: Style, - pub redis_connection_url: String, + pub redis_url: String, pub aggregator: AggregatorConfig, pub logging: bool, pub debug: bool, @@ -55,12 +55,12 @@ impl Config { Ok(Config { port: globals.get::<_, u16>("port")?, - binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?, + binding_ip: globals.get::<_, String>("binding_ip")?, style: Style::new( globals.get::<_, String>("theme")?, globals.get::<_, String>("colorscheme")?, ), - redis_connection_url: globals.get::<_, String>("redis_connection_url")?, + redis_url: globals.get::<_, String>("redis_url")?, aggregator: AggregatorConfig { random_delay: globals.get::<_, bool>("production_use")?, }, diff --git a/src/config_parser/parser_models.rs b/src/config/parser_models.rs similarity index 100% rename from src/config_parser/parser_models.rs rename to src/config/parser_models.rs diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 44d7b0d..21202de 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -7,7 +7,7 @@ use std::{collections::HashMap, time::Duration}; use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use scraper::{Html, Selector}; -use crate::search_results_handler::aggregation_models::RawSearchResult; +use crate::results::aggregation_models::RawSearchResult; use super::engine_models::EngineError; diff --git a/src/engines/searx.rs b/src/engines/searx.rs index bc68608..2a59901 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -6,7 +6,7 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use scraper::{Html, Selector}; use std::collections::HashMap; -use crate::search_results_handler::aggregation_models::RawSearchResult; +use crate::results::aggregation_models::RawSearchResult; use super::engine_models::EngineError; use error_stack::{IntoReport, Report, Result, ResultExt}; diff --git a/src/handler/mod.rs b/src/handler/mod.rs index daa5212..0d07e51 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -1 +1 @@ -pub mod public_path_handler; +pub mod public_paths; diff --git a/src/handler/public_path_handler.rs b/src/handler/public_paths.rs similarity index 68% rename from src/handler/public_path_handler.rs rename to src/handler/public_paths.rs index b99283e..7f0924b 100644 --- a/src/handler/public_path_handler.rs +++ b/src/handler/public_paths.rs @@ -17,15 +17,17 @@ static PUBLIC_DIRECTORY_NAME: &str = "public"; /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) /// 2. Under project folder ( or codebase in other words) if it is not present /// here then it returns an error as mentioned above. -pub fn handle_different_public_path() -> Result { +pub fn get_public_path() -> Result { if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() { - Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME)) - } else if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() { - Ok(format!("./{}", PUBLIC_DIRECTORY_NAME)) - } else { - Err(Error::new( - std::io::ErrorKind::NotFound, - "Themes (public) folder not found!!", - )) + return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME)); } + + if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() { + return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME)); + } + + Err(Error::new( + std::io::ErrorKind::NotFound, + "Themes (public) folder not found!!", + )) } diff --git a/src/lib.rs b/src/lib.rs index 6b6d4fe..2dd7198 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,10 +2,10 @@ //! and register all the routes for the `websurfx` meta search engine website. pub mod cache; -pub mod config_parser; +pub mod config; pub mod engines; pub mod handler; -pub mod search_results_handler; +pub mod results; pub mod server; use std::net::TcpListener; @@ -14,9 +14,9 @@ use crate::server::routes; use actix_files as fs; use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer}; -use config_parser::parser::Config; +use config::parser::Config; use handlebars::Handlebars; -use handler::public_path_handler::handle_different_public_path; +use handler::public_paths::get_public_path; /// Runs the web server on the provided TCP listener and returns a `Server` instance. /// @@ -41,7 +41,7 @@ use handler::public_path_handler::handle_different_public_path; pub fn run(listener: TcpListener, config: Config) -> std::io::Result { let mut handlebars: Handlebars = Handlebars::new(); - let public_folder_path: String = handle_different_public_path()?; + let public_folder_path: String = get_public_path()?; handlebars .register_templates_directory(".html", format!("{}/templates", public_folder_path)) diff --git a/src/search_results_handler/aggregation_models.rs b/src/results/aggregation_models.rs similarity index 99% rename from src/search_results_handler/aggregation_models.rs rename to src/results/aggregation_models.rs index b6e6b81..86559a7 100644 --- a/src/search_results_handler/aggregation_models.rs +++ b/src/results/aggregation_models.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; -use crate::config_parser::parser_models::Style; +use crate::config::parser_models::Style; /// A named struct to store, serialize and deserializes the individual search result from all the /// scraped and aggregated search results from the upstream search engines. diff --git a/src/search_results_handler/aggregator.rs b/src/results/aggregator.rs similarity index 100% rename from src/search_results_handler/aggregator.rs rename to src/results/aggregator.rs diff --git a/src/search_results_handler/mod.rs b/src/results/mod.rs similarity index 100% rename from src/search_results_handler/mod.rs rename to src/results/mod.rs diff --git a/src/search_results_handler/user_agent.rs b/src/results/user_agent.rs similarity index 100% rename from src/search_results_handler/user_agent.rs rename to src/results/user_agent.rs diff --git a/src/server/routes.rs b/src/server/routes.rs index 60d3007..cecdcdd 100644 --- a/src/server/routes.rs +++ b/src/server/routes.rs @@ -6,9 +6,9 @@ use std::fs::read_to_string; use crate::{ cache::cacher::RedisCache, - config_parser::parser::Config, - handler::public_path_handler::handle_different_public_path, - search_results_handler::{aggregation_models::SearchResults, aggregator::aggregate}, + config::parser::Config, + handler::public_paths::get_public_path, + results::{aggregation_models::SearchResults, aggregator::aggregate}, }; use actix_web::{get, web, HttpRequest, HttpResponse}; use handlebars::Handlebars; @@ -73,46 +73,25 @@ pub async fn search( ) -> Result> { let params = web::Query::::from_query(req.query_string())?; - //Initialize redis cache connection struct - let mut redis_cache = RedisCache::new(config.redis_connection_url.clone())?; match ¶ms.q { Some(query) => { if query.trim().is_empty() { - Ok(HttpResponse::Found() + return Ok(HttpResponse::Found() .insert_header(("location", "/")) - .finish()) - } else { - let page = match ¶ms.page { - Some(page) => *page, - None => 0, - }; - - let page_url = format!( - "http://{}:{}/search?q={}&page={}", - config.binding_ip_addr, config.port, query, page - ); - - // fetch the cached results json. - let cached_results_json = redis_cache.cached_results_json(&page_url); - // check if fetched results was indeed fetched or it was an error and if so - // handle the data accordingly. - match cached_results_json { - Ok(results_json) => { - let new_results_json: SearchResults = serde_json::from_str(&results_json)?; - let page_content: String = hbs.render("search", &new_results_json)?; - Ok(HttpResponse::Ok().body(page_content)) - } - Err(_) => { - let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = - aggregate(query, page, config.aggregator.random_delay, config.debug).await?; - results_json.add_style(config.style.clone()); - redis_cache - .cache_results(serde_json::to_string(&results_json)?, &page_url)?; - let page_content: String = hbs.render("search", &results_json)?; - Ok(HttpResponse::Ok().body(page_content)) - } - } + .finish()); } + let page = match ¶ms.page { + Some(page) => *page, + None => 0, + }; + + let url = format!( + "http://{}:{}/search?q={}&page={}", + config.binding_ip, config.port, query, page + ); + let results_json = get_results(url, &config, query, page).await?; + let page_content: String = hbs.render("search", &results_json)?; + Ok(HttpResponse::Ok().body(page_content)) } None => Ok(HttpResponse::Found() .insert_header(("location", "/")) @@ -120,11 +99,36 @@ pub async fn search( } } +/// Fetches the results for a query and page. +/// First checks the redis cache, if that fails it gets proper results +async fn get_results( + url: String, + config: &Config, + query: &str, + page: u32, +) -> Result> { + //Initialize redis cache connection struct + let mut redis_cache = RedisCache::new(config.redis_url.clone())?; + // fetch the cached results json. + let cached_results_json = redis_cache.get_cached_json(&url); + // check if fetched results was indeed fetched or it was an error and if so + // handle the data accordingly. + match cached_results_json { + Ok(results_json) => Ok(serde_json::from_str::(&results_json).unwrap()), + Err(_) => { + let mut results_json: crate::results::aggregation_models::SearchResults = + aggregate(query, page, config.aggregator.random_delay, config.debug).await?; + results_json.add_style(config.style.clone()); + redis_cache.cache_results(serde_json::to_string(&results_json)?, &url)?; + Ok(results_json) + } + } +} + /// Handles the route of robots.txt page of the `websurfx` meta search engine website. #[get("/robots.txt")] pub async fn robots_data(_req: HttpRequest) -> Result> { - let page_content: String = - read_to_string(format!("{}/robots.txt", handle_different_public_path()?))?; + let page_content: String = read_to_string(format!("{}/robots.txt", get_public_path()?))?; Ok(HttpResponse::Ok() .content_type("text/plain; charset=ascii") .body(page_content)) diff --git a/tests/index.rs b/tests/index.rs index e3059bf..657a466 100644 --- a/tests/index.rs +++ b/tests/index.rs @@ -1,7 +1,7 @@ use std::net::TcpListener; use handlebars::Handlebars; -use websurfx::{config_parser::parser::Config, run}; +use websurfx::{config::parser::Config, run}; // Starts a new instance of the HTTP server, bound to a random available port fn spawn_app() -> String { @@ -41,5 +41,5 @@ async fn test_index() { assert_eq!(res.text().await.unwrap(), template); } -// TODO: Write tests for tesing parameters for search function that if provided with something +// TODO: Write tests for testing parameters for search function that if provided with something // other than u32 like alphabets and special characters than it should panic diff --git a/websurfx/config.lua b/websurfx/config.lua index 3daaa91..3e2167a 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -4,7 +4,7 @@ debug = false -- an option to enable or disable debug mode. -- ### Server ### port = "8080" -- port on which server should be launched -binding_ip_addr = "127.0.0.1" --ip address on the which server should be launched. +binding_ip = "127.0.0.1" --ip address on the which server should be launched. production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users) -- if production_use is set to true -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests. @@ -25,4 +25,4 @@ colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used fo theme = "simple" -- the theme name which should be used for the website -- ### Caching ### -redis_connection_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on. +redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on. From 4cb38b1b54b8e1b4d7abe07ee3409feb9087ca62 Mon Sep 17 00:00:00 2001 From: Milim <81323548+MilimTheTrueOne@users.noreply.github.com> Date: Mon, 3 Jul 2023 19:38:17 +0200 Subject: [PATCH 2/2] Fix docs --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 2dd7198..30ebe797 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,7 @@ use handler::public_paths::get_public_path; /// /// ```rust /// use std::net::TcpListener; -/// use websurfx::{config_parser::parser::Config, run}; +/// use websurfx::{config::parser::Config, run}; /// /// let config = Config::parse().unwrap(); /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");