0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-12-22 20:38:22 -05:00

Merge branch 'rolling' into feat-error-box-for-engine-errors

This commit is contained in:
zhou fan 2023-08-24 08:16:32 +08:00 committed by GitHub
commit 2f1fa00f87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 204 additions and 127 deletions

52
Cargo.lock generated
View File

@ -77,7 +77,7 @@ dependencies = [
"encoding_rs",
"flate2",
"futures-core",
"h2 0.3.20",
"h2 0.3.21",
"http 0.2.9",
"httparse",
"httpdate",
@ -475,9 +475,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.82"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"jobserver",
"libc",
@ -816,9 +816,9 @@ dependencies = [
[[package]]
name = "deranged"
version = "0.3.7"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7684a49fb1af197853ef7b2ee694bc1f5b4179556f1e5710e1760c5db6f5e929"
checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946"
[[package]]
name = "derive_more"
@ -1176,9 +1176,9 @@ dependencies = [
[[package]]
name = "h2"
version = "0.3.20"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049"
checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833"
dependencies = [
"bytes 1.4.0",
"fnv",
@ -1363,7 +1363,7 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-util",
"h2 0.3.20",
"h2 0.3.21",
"http 0.2.9",
"http-body 0.4.5",
"httparse",
@ -2454,16 +2454,16 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.11.18"
version = "0.11.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
checksum = "20b9b67e2ca7dd9e9f9285b759de30ff538aab981abaaf7bc9bd90b84a0126c3"
dependencies = [
"base64 0.21.2",
"bytes 1.4.0",
"encoding_rs",
"futures-core",
"futures-util",
"h2 0.3.20",
"h2 0.3.21",
"http 0.2.9",
"http-body 0.4.5",
"hyper 0.14.27",
@ -2486,7 +2486,7 @@ dependencies = [
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"winreg 0.10.1",
"winreg 0.50.0",
]
[[package]]
@ -2684,18 +2684,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.183"
version = "1.0.185"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c"
checksum = "be9b6f69f1dfd54c3b568ffa45c310d6973a5e5148fd40cf515acaf38cf5bc31"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.183"
version = "1.0.185"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816"
checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec"
dependencies = [
"proc-macro2 1.0.66",
"quote 1.0.33",
@ -2797,9 +2797,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]]
name = "slab"
version = "0.4.8"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg 1.1.0",
]
@ -3328,9 +3328,9 @@ checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
[[package]]
name = "unicase"
version = "2.6.0"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89"
dependencies = [
"version_check",
]
@ -3543,7 +3543,7 @@ dependencies = [
[[package]]
name = "websurfx"
version = "0.17.0"
version = "0.18.0"
dependencies = [
"actix-cors",
"actix-files",
@ -3559,7 +3559,8 @@ dependencies = [
"once_cell",
"rand 0.8.5",
"redis",
"reqwest 0.11.18",
"regex",
"reqwest 0.11.19",
"rlua",
"rusty-hook",
"scraper",
@ -3688,11 +3689,12 @@ dependencies = [
[[package]]
name = "winreg"
version = "0.10.1"
version = "0.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [
"winapi 0.3.9",
"cfg-if 1.0.0",
"windows-sys",
]
[[package]]

View File

@ -1,15 +1,15 @@
[package]
name = "websurfx"
version = "0.17.0"
version = "0.18.0"
edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx"
license = "AGPL-3.0"
[dependencies]
reqwest = {version="0.11.18",features=["json"]}
reqwest = {version="0.11.19",features=["json"]}
tokio = {version="1.32.0",features=["full"]}
serde = {version="1.0.183",features=["derive"]}
serde = {version="1.0.185",features=["derive"]}
handlebars = { version = "4.3.7", features = ["dir_source"] }
scraper = {version="0.17.1"}
actix-web = {version="4.3.1", features = ["cookies"]}
@ -26,6 +26,7 @@ rand={version="0.8.5"}
once_cell = {version="1.18.0"}
error-stack = {version="0.3.1"}
async-trait = {version="0.1.73"}
regex = {version="1.9.3", features=["perf"]}
[dev-dependencies]
rusty-hook = "^0.11.2"
@ -50,7 +51,7 @@ split-debuginfo = '...'
debug-assertions = false
overflow-checks = false
lto = 'thin'
panic = 'unwind'
panic = 'abort'
incremental = false
codegen-units = 16
rpath = false

View File

@ -1,14 +1,12 @@
//! This module provides the functionality to parse the lua config and convert the config options
//! into rust readable form.
use crate::handler::paths::{file_path, FileType};
use super::parser_models::Style;
use log::LevelFilter;
use rlua::Lua;
use std::{collections::HashMap, format, fs, path::Path, thread::available_parallelism};
// ------- Constants --------
static COMMON_DIRECTORY_NAME: &str = "websurfx";
static CONFIG_FILE_NAME: &str = "config.lua";
use std::{collections::HashMap, fs, thread::available_parallelism};
/// A named struct which stores the parsed config file options.
///
@ -69,7 +67,7 @@ impl Config {
let globals = context.globals();
context
.load(&fs::read_to_string(Config::config_path()?)?)
.load(&fs::read_to_string(file_path(FileType::Config)?)?)
.exec()?;
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
@ -114,52 +112,6 @@ impl Config {
})
})
}
/// A helper function which returns an appropriate config file path checking if the config
/// file exists on that path.
///
/// # Error
///
/// Returns a `config file not found!!` error if the config file is not present under following
/// paths which are:
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
/// one (3).
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
/// here then it returns an error as mentioned above.
fn config_path() -> Result<String, Box<dyn std::error::Error>> {
// check user config
let path = format!(
"{}/.config/{}/config.lua",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME
);
if Path::new(path.as_str()).exists() {
return Ok(format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
CONFIG_FILE_NAME
));
}
// look for config in /etc/xdg
if Path::new(format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
.exists()
{
return Ok("/etc/xdg/websurfx/config.lua".to_string());
}
// use dev config
if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()).exists()
{
return Ok("./websurfx/config.lua".to_string());
}
// if no of the configs above exist, return error
Err("Config file not found!!".to_string().into())
}
}
/// a helper function that sets the proper logging level

View File

@ -1 +1 @@
pub mod public_paths;
pub mod paths;

111
src/handler/paths.rs Normal file
View File

@ -0,0 +1,111 @@
//! This module provides the functionality to handle theme folder present on different paths and
//! provide one appropriate path on which it is present and can be used.
use std::collections::HashMap;
use std::io::Error;
use std::path::Path;
// ------- Constants --------
static PUBLIC_DIRECTORY_NAME: &str = "public";
static COMMON_DIRECTORY_NAME: &str = "websurfx";
static CONFIG_FILE_NAME: &str = "config.lua";
static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
#[derive(Hash, PartialEq, Eq, Debug)]
pub enum FileType {
Config,
AllowList,
BlockList,
Theme,
}
static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
once_cell::sync::Lazy::new(|| {
HashMap::from([
(
FileType::Config,
vec![
format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
CONFIG_FILE_NAME
),
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
],
),
(
FileType::Theme,
vec![
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
format!("./{}/", PUBLIC_DIRECTORY_NAME),
],
),
(
FileType::AllowList,
vec![
format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
ALLOWLIST_FILE_NAME
),
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
],
),
(
FileType::BlockList,
vec![
format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
BLOCKLIST_FILE_NAME
),
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
],
),
])
});
/// A helper function which returns an appropriate config file path checking if the config
/// file exists on that path.
///
/// # Error
///
/// Returns a `config file not found!!` error if the config file is not present under following
/// paths which are:
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
/// one (3).
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
/// here then it returns an error as mentioned above.
/// A function which returns an appropriate theme directory path checking if the theme
/// directory exists on that path.
///
/// # Error
///
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
/// paths which are:
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
/// 2. Under project folder ( or codebase in other words) if it is not present
/// here then it returns an error as mentioned above.
pub fn file_path(file_type: FileType) -> Result<String, Error> {
let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
for (idx, _) in file_path.iter().enumerate() {
if Path::new(file_path[idx].as_str()).exists() {
return Ok(file_path[idx].clone());
}
}
// if no of the configs above exist, return error
Err(Error::new(
std::io::ErrorKind::NotFound,
format!("{:?} file not found!!", file_type),
))
}

View File

@ -1,33 +0,0 @@
//! This module provides the functionality to handle theme folder present on different paths and
//! provide one appropriate path on which it is present and can be used.
use std::io::Error;
use std::path::Path;
// ------- Constants --------
static PUBLIC_DIRECTORY_NAME: &str = "public";
/// A function which returns an appropriate theme directory path checking if the theme
/// directory exists on that path.
///
/// # Error
///
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
/// paths which are:
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
/// 2. Under project folder ( or codebase in other words) if it is not present
/// here then it returns an error as mentioned above.
pub fn public_path() -> Result<String, Error> {
if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
}
if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
}
Err(Error::new(
std::io::ErrorKind::NotFound,
"Themes (public) folder not found!!",
))
}

View File

@ -17,7 +17,7 @@ use actix_files as fs;
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
use config::parser::Config;
use handlebars::Handlebars;
use handler::public_paths::public_path;
use handler::paths::{file_path, FileType};
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
///
@ -42,7 +42,7 @@ use handler::public_paths::public_path;
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
let mut handlebars: Handlebars = Handlebars::new();
let public_folder_path: String = public_path()?;
let public_folder_path: String = file_path(FileType::Theme)?;
handlebars
.register_templates_directory(".html", format!("{}/templates", public_folder_path))

View File

@ -1,18 +1,26 @@
//! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results.
use std::{collections::HashMap, time::Duration};
use error_stack::Report;
use rand::Rng;
use tokio::task::JoinHandle;
use std::{
collections::HashMap,
io::{BufReader, Read},
time::Duration,
};
use super::{
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
user_agent::random_user_agent,
};
use error_stack::Report;
use rand::Rng;
use regex::Regex;
use std::{fs::File, io::BufRead};
use tokio::task::JoinHandle;
use crate::engines::engine_models::{EngineError, EngineHandler};
use crate::{
engines::engine_models::{EngineError, EngineHandler},
handler::paths::{file_path, FileType},
};
/// Aliases for long type annotations
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
@ -106,7 +114,7 @@ pub async fn aggregate(
log::error!("Engine Error: {:?}", error);
engine_errors_info.push(EngineErrorInfo::new(
error.downcast_ref::<EngineError>().unwrap(),
engine_name.to_string(),
engine_name,
));
};
@ -143,7 +151,22 @@ pub async fn aggregate(
}
}
let results = result_map.into_values().collect();
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
filter_with_lists(
&mut result_map,
&mut blacklist_map,
&file_path(FileType::BlockList)?,
)?;
filter_with_lists(
&mut blacklist_map,
&mut result_map,
&file_path(FileType::AllowList)?,
)?;
drop(blacklist_map);
let results: Vec<SearchResult> = result_map.into_values().collect();
Ok(SearchResults::new(
results,
@ -151,3 +174,23 @@ pub async fn aggregate(
engine_errors_info,
))
}
fn filter_with_lists(
map_to_be_filtered: &mut HashMap<String, SearchResult>,
resultant_map: &mut HashMap<String, SearchResult>,
file_path: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let mut reader = BufReader::new(File::open(file_path)?);
for line in reader.by_ref().lines() {
let re = Regex::new(&line?)?;
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
if re.is_match(&url.to_lowercase())
|| re.is_match(&search_result.title.to_lowercase())
|| re.is_match(&search_result.description.to_lowercase())
{
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
}
}
}
Ok(())
}

View File

@ -8,7 +8,7 @@ use crate::{
cache::cacher::RedisCache,
config::parser::Config,
engines::engine_models::EngineHandler,
handler::public_paths::public_path,
handler::paths::{file_path, FileType},
results::{aggregation_models::SearchResults, aggregator::aggregate},
};
use actix_web::{get, web, HttpRequest, HttpResponse};
@ -215,7 +215,8 @@ async fn results(
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
#[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = read_to_string(format!("{}/robots.txt", public_path()?))?;
let page_content: String =
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
Ok(HttpResponse::Ok()
.content_type("text/plain; charset=ascii")
.body(page_content))

0
websurfx/allowlist.txt Normal file
View File

0
websurfx/blocklist.txt Normal file
View File