mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-25 07:28:22 -05:00
Merge branch 'rolling' into improving-gitpod
This commit is contained in:
commit
29b1376cde
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
use std::net::TcpListener;
|
use std::net::TcpListener;
|
||||||
|
|
||||||
use websurfx::{config_parser::parser::Config, run};
|
use websurfx::{config::parser::Config, run};
|
||||||
|
|
||||||
/// The function that launches the main server and registers all the routes of the website.
|
/// The function that launches the main server and registers all the routes of the website.
|
||||||
///
|
///
|
||||||
@ -26,7 +26,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
|
|
||||||
log::info!("started server on port {}", config.port);
|
log::info!("started server on port {}", config.port);
|
||||||
|
|
||||||
let listener = TcpListener::bind((config.binding_ip_addr.clone(), config.port))?;
|
let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?;
|
||||||
|
|
||||||
run(listener, config)?.await
|
run(listener, config)?.await
|
||||||
}
|
}
|
||||||
|
8
src/cache/cacher.rs
vendored
8
src/cache/cacher.rs
vendored
@ -32,7 +32,7 @@ impl RedisCache {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `url` - It takes an url as string.
|
/// * `url` - It takes an url as string.
|
||||||
fn compute_url_hash(url: &str) -> String {
|
fn hash_url(url: &str) -> String {
|
||||||
format!("{:?}", compute(url))
|
format!("{:?}", compute(url))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,8 +41,8 @@ impl RedisCache {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `url` - It takes an url as a string.
|
/// * `url` - It takes an url as a string.
|
||||||
pub fn cached_results_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
pub fn get_cached_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||||
let hashed_url_string = Self::compute_url_hash(url);
|
let hashed_url_string = Self::hash_url(url);
|
||||||
Ok(self.connection.get(hashed_url_string)?)
|
Ok(self.connection.get(hashed_url_string)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,7 +59,7 @@ impl RedisCache {
|
|||||||
json_results: String,
|
json_results: String,
|
||||||
url: &str,
|
url: &str,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let hashed_url_string = Self::compute_url_hash(url);
|
let hashed_url_string = Self::hash_url(url);
|
||||||
|
|
||||||
// put results_json into cache
|
// put results_json into cache
|
||||||
self.connection.set(&hashed_url_string, json_results)?;
|
self.connection.set(&hashed_url_string, json_results)?;
|
||||||
|
@ -14,16 +14,16 @@ static CONFIG_FILE_NAME: &str = "config.lua";
|
|||||||
/// # Fields
|
/// # Fields
|
||||||
//
|
//
|
||||||
/// * `port` - It stores the parsed port number option on which the server should launch.
|
/// * `port` - It stores the parsed port number option on which the server should launch.
|
||||||
/// * `binding_ip_addr` - It stores the parsed ip address option on which the server should launch
|
/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
|
||||||
/// * `style` - It stores the theming options for the website.
|
/// * `style` - It stores the theming options for the website.
|
||||||
/// * `redis_connection_url` - It stores the redis connection url address on which the redis
|
/// * `redis_url` - It stores the redis connection url address on which the redis
|
||||||
/// client should connect.
|
/// client should connect.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub port: u16,
|
pub port: u16,
|
||||||
pub binding_ip_addr: String,
|
pub binding_ip: String,
|
||||||
pub style: Style,
|
pub style: Style,
|
||||||
pub redis_connection_url: String,
|
pub redis_url: String,
|
||||||
pub aggregator: AggregatorConfig,
|
pub aggregator: AggregatorConfig,
|
||||||
pub logging: bool,
|
pub logging: bool,
|
||||||
pub debug: bool,
|
pub debug: bool,
|
||||||
@ -55,12 +55,12 @@ impl Config {
|
|||||||
|
|
||||||
Ok(Config {
|
Ok(Config {
|
||||||
port: globals.get::<_, u16>("port")?,
|
port: globals.get::<_, u16>("port")?,
|
||||||
binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?,
|
binding_ip: globals.get::<_, String>("binding_ip")?,
|
||||||
style: Style::new(
|
style: Style::new(
|
||||||
globals.get::<_, String>("theme")?,
|
globals.get::<_, String>("theme")?,
|
||||||
globals.get::<_, String>("colorscheme")?,
|
globals.get::<_, String>("colorscheme")?,
|
||||||
),
|
),
|
||||||
redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
|
redis_url: globals.get::<_, String>("redis_url")?,
|
||||||
aggregator: AggregatorConfig {
|
aggregator: AggregatorConfig {
|
||||||
random_delay: globals.get::<_, bool>("production_use")?,
|
random_delay: globals.get::<_, bool>("production_use")?,
|
||||||
},
|
},
|
@ -7,7 +7,7 @@ use std::{collections::HashMap, time::Duration};
|
|||||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
use crate::results::aggregation_models::RawSearchResult;
|
||||||
|
|
||||||
use super::engine_models::EngineError;
|
use super::engine_models::EngineError;
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
|||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
use crate::results::aggregation_models::RawSearchResult;
|
||||||
|
|
||||||
use super::engine_models::EngineError;
|
use super::engine_models::EngineError;
|
||||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||||
|
@ -1 +1 @@
|
|||||||
pub mod public_path_handler;
|
pub mod public_paths;
|
||||||
|
@ -17,15 +17,17 @@ static PUBLIC_DIRECTORY_NAME: &str = "public";
|
|||||||
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
||||||
/// 2. Under project folder ( or codebase in other words) if it is not present
|
/// 2. Under project folder ( or codebase in other words) if it is not present
|
||||||
/// here then it returns an error as mentioned above.
|
/// here then it returns an error as mentioned above.
|
||||||
pub fn handle_different_public_path() -> Result<String, Error> {
|
pub fn get_public_path() -> Result<String, Error> {
|
||||||
if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
||||||
Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME))
|
return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
|
||||||
} else if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
}
|
||||||
Ok(format!("./{}", PUBLIC_DIRECTORY_NAME))
|
|
||||||
} else {
|
if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
||||||
|
return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
|
||||||
|
}
|
||||||
|
|
||||||
Err(Error::new(
|
Err(Error::new(
|
||||||
std::io::ErrorKind::NotFound,
|
std::io::ErrorKind::NotFound,
|
||||||
"Themes (public) folder not found!!",
|
"Themes (public) folder not found!!",
|
||||||
))
|
))
|
||||||
}
|
|
||||||
}
|
}
|
12
src/lib.rs
12
src/lib.rs
@ -2,10 +2,10 @@
|
|||||||
//! and register all the routes for the `websurfx` meta search engine website.
|
//! and register all the routes for the `websurfx` meta search engine website.
|
||||||
|
|
||||||
pub mod cache;
|
pub mod cache;
|
||||||
pub mod config_parser;
|
pub mod config;
|
||||||
pub mod engines;
|
pub mod engines;
|
||||||
pub mod handler;
|
pub mod handler;
|
||||||
pub mod search_results_handler;
|
pub mod results;
|
||||||
pub mod server;
|
pub mod server;
|
||||||
|
|
||||||
use std::net::TcpListener;
|
use std::net::TcpListener;
|
||||||
@ -14,9 +14,9 @@ use crate::server::routes;
|
|||||||
|
|
||||||
use actix_files as fs;
|
use actix_files as fs;
|
||||||
use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer};
|
use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer};
|
||||||
use config_parser::parser::Config;
|
use config::parser::Config;
|
||||||
use handlebars::Handlebars;
|
use handlebars::Handlebars;
|
||||||
use handler::public_path_handler::handle_different_public_path;
|
use handler::public_paths::get_public_path;
|
||||||
|
|
||||||
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
||||||
///
|
///
|
||||||
@ -32,7 +32,7 @@ use handler::public_path_handler::handle_different_public_path;
|
|||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
/// use std::net::TcpListener;
|
/// use std::net::TcpListener;
|
||||||
/// use websurfx::{config_parser::parser::Config, run};
|
/// use websurfx::{config::parser::Config, run};
|
||||||
///
|
///
|
||||||
/// let config = Config::parse().unwrap();
|
/// let config = Config::parse().unwrap();
|
||||||
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
|
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
|
||||||
@ -41,7 +41,7 @@ use handler::public_path_handler::handle_different_public_path;
|
|||||||
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
||||||
let mut handlebars: Handlebars = Handlebars::new();
|
let mut handlebars: Handlebars = Handlebars::new();
|
||||||
|
|
||||||
let public_folder_path: String = handle_different_public_path()?;
|
let public_folder_path: String = get_public_path()?;
|
||||||
|
|
||||||
handlebars
|
handlebars
|
||||||
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::config_parser::parser_models::Style;
|
use crate::config::parser_models::Style;
|
||||||
|
|
||||||
/// A named struct to store, serialize and deserializes the individual search result from all the
|
/// A named struct to store, serialize and deserializes the individual search result from all the
|
||||||
/// scraped and aggregated search results from the upstream search engines.
|
/// scraped and aggregated search results from the upstream search engines.
|
@ -6,9 +6,9 @@ use std::fs::read_to_string;
|
|||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
cache::cacher::RedisCache,
|
cache::cacher::RedisCache,
|
||||||
config_parser::parser::Config,
|
config::parser::Config,
|
||||||
handler::public_path_handler::handle_different_public_path,
|
handler::public_paths::get_public_path,
|
||||||
search_results_handler::{aggregation_models::SearchResults, aggregator::aggregate},
|
results::{aggregation_models::SearchResults, aggregator::aggregate},
|
||||||
};
|
};
|
||||||
use actix_web::{get, web, HttpRequest, HttpResponse};
|
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||||
use handlebars::Handlebars;
|
use handlebars::Handlebars;
|
||||||
@ -73,58 +73,62 @@ pub async fn search(
|
|||||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
||||||
|
|
||||||
//Initialize redis cache connection struct
|
|
||||||
let mut redis_cache = RedisCache::new(config.redis_connection_url.clone())?;
|
|
||||||
match ¶ms.q {
|
match ¶ms.q {
|
||||||
Some(query) => {
|
Some(query) => {
|
||||||
if query.trim().is_empty() {
|
if query.trim().is_empty() {
|
||||||
Ok(HttpResponse::Found()
|
return Ok(HttpResponse::Found()
|
||||||
.insert_header(("location", "/"))
|
.insert_header(("location", "/"))
|
||||||
.finish())
|
.finish());
|
||||||
} else {
|
}
|
||||||
let page = match ¶ms.page {
|
let page = match ¶ms.page {
|
||||||
Some(page) => *page,
|
Some(page) => *page,
|
||||||
None => 0,
|
None => 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
let page_url = format!(
|
let url = format!(
|
||||||
"http://{}:{}/search?q={}&page={}",
|
"http://{}:{}/search?q={}&page={}",
|
||||||
config.binding_ip_addr, config.port, query, page
|
config.binding_ip, config.port, query, page
|
||||||
);
|
);
|
||||||
|
let results_json = get_results(url, &config, query, page).await?;
|
||||||
// fetch the cached results json.
|
|
||||||
let cached_results_json = redis_cache.cached_results_json(&page_url);
|
|
||||||
// check if fetched results was indeed fetched or it was an error and if so
|
|
||||||
// handle the data accordingly.
|
|
||||||
match cached_results_json {
|
|
||||||
Ok(results_json) => {
|
|
||||||
let new_results_json: SearchResults = serde_json::from_str(&results_json)?;
|
|
||||||
let page_content: String = hbs.render("search", &new_results_json)?;
|
|
||||||
Ok(HttpResponse::Ok().body(page_content))
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
let mut results_json: crate::search_results_handler::aggregation_models::SearchResults =
|
|
||||||
aggregate(query, page, config.aggregator.random_delay, config.debug).await?;
|
|
||||||
results_json.add_style(config.style.clone());
|
|
||||||
redis_cache
|
|
||||||
.cache_results(serde_json::to_string(&results_json)?, &page_url)?;
|
|
||||||
let page_content: String = hbs.render("search", &results_json)?;
|
let page_content: String = hbs.render("search", &results_json)?;
|
||||||
Ok(HttpResponse::Ok().body(page_content))
|
Ok(HttpResponse::Ok().body(page_content))
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => Ok(HttpResponse::Found()
|
None => Ok(HttpResponse::Found()
|
||||||
.insert_header(("location", "/"))
|
.insert_header(("location", "/"))
|
||||||
.finish()),
|
.finish()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fetches the results for a query and page.
|
||||||
|
/// First checks the redis cache, if that fails it gets proper results
|
||||||
|
async fn get_results(
|
||||||
|
url: String,
|
||||||
|
config: &Config,
|
||||||
|
query: &str,
|
||||||
|
page: u32,
|
||||||
|
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||||
|
//Initialize redis cache connection struct
|
||||||
|
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
|
||||||
|
// fetch the cached results json.
|
||||||
|
let cached_results_json = redis_cache.get_cached_json(&url);
|
||||||
|
// check if fetched results was indeed fetched or it was an error and if so
|
||||||
|
// handle the data accordingly.
|
||||||
|
match cached_results_json {
|
||||||
|
Ok(results_json) => Ok(serde_json::from_str::<SearchResults>(&results_json).unwrap()),
|
||||||
|
Err(_) => {
|
||||||
|
let mut results_json: crate::results::aggregation_models::SearchResults =
|
||||||
|
aggregate(query, page, config.aggregator.random_delay, config.debug).await?;
|
||||||
|
results_json.add_style(config.style.clone());
|
||||||
|
redis_cache.cache_results(serde_json::to_string(&results_json)?, &url)?;
|
||||||
|
Ok(results_json)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
||||||
#[get("/robots.txt")]
|
#[get("/robots.txt")]
|
||||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
let page_content: String =
|
let page_content: String = read_to_string(format!("{}/robots.txt", get_public_path()?))?;
|
||||||
read_to_string(format!("{}/robots.txt", handle_different_public_path()?))?;
|
|
||||||
Ok(HttpResponse::Ok()
|
Ok(HttpResponse::Ok()
|
||||||
.content_type("text/plain; charset=ascii")
|
.content_type("text/plain; charset=ascii")
|
||||||
.body(page_content))
|
.body(page_content))
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::net::TcpListener;
|
use std::net::TcpListener;
|
||||||
|
|
||||||
use handlebars::Handlebars;
|
use handlebars::Handlebars;
|
||||||
use websurfx::{config_parser::parser::Config, run};
|
use websurfx::{config::parser::Config, run};
|
||||||
|
|
||||||
// Starts a new instance of the HTTP server, bound to a random available port
|
// Starts a new instance of the HTTP server, bound to a random available port
|
||||||
fn spawn_app() -> String {
|
fn spawn_app() -> String {
|
||||||
@ -41,5 +41,5 @@ async fn test_index() {
|
|||||||
assert_eq!(res.text().await.unwrap(), template);
|
assert_eq!(res.text().await.unwrap(), template);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Write tests for tesing parameters for search function that if provided with something
|
// TODO: Write tests for testing parameters for search function that if provided with something
|
||||||
// other than u32 like alphabets and special characters than it should panic
|
// other than u32 like alphabets and special characters than it should panic
|
||||||
|
@ -4,7 +4,7 @@ debug = false -- an option to enable or disable debug mode.
|
|||||||
|
|
||||||
-- ### Server ###
|
-- ### Server ###
|
||||||
port = "8080" -- port on which server should be launched
|
port = "8080" -- port on which server should be launched
|
||||||
binding_ip_addr = "127.0.0.1" --ip address on the which server should be launched.
|
binding_ip = "127.0.0.1" --ip address on the which server should be launched.
|
||||||
production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users)
|
production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users)
|
||||||
-- if production_use is set to true
|
-- if production_use is set to true
|
||||||
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
||||||
@ -25,4 +25,4 @@ colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used fo
|
|||||||
theme = "simple" -- the theme name which should be used for the website
|
theme = "simple" -- the theme name which should be used for the website
|
||||||
|
|
||||||
-- ### Caching ###
|
-- ### Caching ###
|
||||||
redis_connection_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
||||||
|
Loading…
Reference in New Issue
Block a user