mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-24 23:18:22 -05:00
Merge branch 'rolling' into doc
This commit is contained in:
commit
079dcbaccc
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,3 +4,4 @@ package-lock.json
|
|||||||
dump.rdb
|
dump.rdb
|
||||||
.vscode
|
.vscode
|
||||||
megalinter-reports/
|
megalinter-reports/
|
||||||
|
dhat-heap.json
|
||||||
|
2
.gitpod.Dockerfile
vendored
2
.gitpod.Dockerfile
vendored
@ -1,3 +1,3 @@
|
|||||||
FROM gitpod/workspace-rust
|
FROM gitpod/workspace-rust
|
||||||
|
|
||||||
RUN sudo install-packages redis-server nodejs npm
|
RUN sudo install-packages redis-server nodejs npm liblua5.4-dev liblua5.3-dev liblua5.2-dev liblua5.1-0-dev libluajit-5.1-dev
|
||||||
|
528
Cargo.lock
generated
528
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
26
Cargo.toml
26
Cargo.toml
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "0.18.6"
|
version = "0.20.7"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||||
repository = "https://github.com/neon-mmd/websurfx"
|
repository = "https://github.com/neon-mmd/websurfx"
|
||||||
@ -8,9 +8,9 @@ license = "AGPL-3.0"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
reqwest = {version="0.11.20",features=["json"]}
|
reqwest = {version="0.11.20",features=["json"]}
|
||||||
tokio = {version="1.32.0",features=["full"]}
|
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
||||||
serde = {version="1.0.188",features=["derive"]}
|
serde = {version="1.0.188",features=["derive"]}
|
||||||
handlebars = { version = "4.3.7", features = ["dir_source"] }
|
handlebars = { version = "4.4.0", features = ["dir_source"] }
|
||||||
scraper = {version="0.17.1"}
|
scraper = {version="0.17.1"}
|
||||||
actix-web = {version="4.4.0", features = ["cookies"]}
|
actix-web = {version="4.4.0", features = ["cookies"]}
|
||||||
actix-files = {version="0.6.2"}
|
actix-files = {version="0.6.2"}
|
||||||
@ -19,14 +19,20 @@ serde_json = {version="1.0.105"}
|
|||||||
fake-useragent = {version="0.1.3"}
|
fake-useragent = {version="0.1.3"}
|
||||||
env_logger = {version="0.10.0"}
|
env_logger = {version="0.10.0"}
|
||||||
log = {version="0.4.20"}
|
log = {version="0.4.20"}
|
||||||
rlua = {version="0.19.7"}
|
mlua = {version="0.8.10", features=["luajit"]}
|
||||||
redis = {version="0.23.2"}
|
redis = {version="0.23.3", features=["tokio-comp","connection-manager"]}
|
||||||
md5 = {version="0.7.0"}
|
md5 = {version="0.7.0"}
|
||||||
rand={version="0.8.5"}
|
rand={version="0.8.5"}
|
||||||
once_cell = {version="1.18.0"}
|
once_cell = {version="1.18.0"}
|
||||||
error-stack = {version="0.4.0"}
|
error-stack = {version="0.4.0"}
|
||||||
async-trait = {version="0.1.73"}
|
async-trait = {version="0.1.73"}
|
||||||
regex = {version="1.9.4", features=["perf"]}
|
regex = {version="1.9.4", features=["perf"]}
|
||||||
|
smallvec = {version="1.11.0", features=["union", "serde"]}
|
||||||
|
futures = {version="0.3.28"}
|
||||||
|
dhat = {version="0.3.2", optional = true}
|
||||||
|
mimalloc = { version = "0.1.38", default-features = false }
|
||||||
|
async-once-cell = {version="0.5.3"}
|
||||||
|
actix-governor = {version="0.4.1"}
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
rusty-hook = "^0.11.2"
|
rusty-hook = "^0.11.2"
|
||||||
@ -47,13 +53,17 @@ rpath = false
|
|||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
debug = false
|
debug = false # This should only be commented when testing with dhat profiler
|
||||||
|
# debug = 1 # This should only be uncommented when testing with dhat profiler
|
||||||
split-debuginfo = '...'
|
split-debuginfo = '...'
|
||||||
debug-assertions = false
|
debug-assertions = false
|
||||||
overflow-checks = false
|
overflow-checks = false
|
||||||
lto = 'thin'
|
lto = true
|
||||||
panic = 'abort'
|
panic = 'abort'
|
||||||
incremental = false
|
incremental = false
|
||||||
codegen-units = 16
|
codegen-units = 1
|
||||||
rpath = false
|
rpath = false
|
||||||
strip = "debuginfo"
|
strip = "debuginfo"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
dhat-heap = ["dep:dhat"]
|
||||||
|
@ -51,7 +51,7 @@
|
|||||||
- **Getting Started**
|
- **Getting Started**
|
||||||
- [🔭 Preview](#preview-)
|
- [🔭 Preview](#preview-)
|
||||||
- [🚀 Features](#features-)
|
- [🚀 Features](#features-)
|
||||||
- [🛠️ Installation and Testing](#installation-and-testing-)
|
- [🛠️ Installation and Testing](#installation-and-testing-%EF%B8%8F)
|
||||||
- [🔧 Configuration](#configuration-)
|
- [🔧 Configuration](#configuration-)
|
||||||
- **Feature Overview**
|
- **Feature Overview**
|
||||||
- [🎨 Theming](#theming-)
|
- [🎨 Theming](#theming-)
|
||||||
|
@ -109,7 +109,7 @@ colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used fo
|
|||||||
theme = "simple" -- the theme name which should be used for the website
|
theme = "simple" -- the theme name which should be used for the website
|
||||||
|
|
||||||
-- ### Caching ###
|
-- ### Caching ###
|
||||||
redis_connection_url = "redis://redis:6379" -- redis connection url address on which the client should connect on.
|
redis_url = "redis://redis:6379" -- redis connection url address on which the client should connect on.
|
||||||
|
|
||||||
-- ### Search Engines ###
|
-- ### Search Engines ###
|
||||||
upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
|
upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
|
||||||
|
BIN
public/images/barricade.png
Normal file
BIN
public/images/barricade.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 892 KiB |
BIN
public/images/filter.png
Normal file
BIN
public/images/filter.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
@ -132,6 +132,35 @@ body {
|
|||||||
width: 1.2rem;
|
width: 1.2rem;
|
||||||
height: 1.2rem;
|
height: 1.2rem;
|
||||||
}
|
}
|
||||||
|
.results .result_disallowed,
|
||||||
|
.results .result_filtered {
|
||||||
|
display: flex;
|
||||||
|
justify-content: center;
|
||||||
|
align-items: center;
|
||||||
|
gap: 10rem;
|
||||||
|
font-size: 2rem;
|
||||||
|
color: var(--foreground-color);
|
||||||
|
margin: 0rem 7rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results .result_disallowed .user_query,
|
||||||
|
.results .result_filtered .user_query {
|
||||||
|
color: var(--background-color);
|
||||||
|
font-weight: 300;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results .result_disallowed img,
|
||||||
|
.results .result_filtered img {
|
||||||
|
width: 30rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results .result_disallowed div,
|
||||||
|
.results .result_filtered div {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 1rem;
|
||||||
|
line-break: strict;
|
||||||
|
}
|
||||||
|
|
||||||
/* styles for the footer and header */
|
/* styles for the footer and header */
|
||||||
|
|
||||||
|
@ -1,37 +1,69 @@
|
|||||||
{{>header this.style}}
|
{{>header this.style}}
|
||||||
<main class="results">
|
<main class="results">
|
||||||
{{>search_bar this}}
|
{{>search_bar this}}
|
||||||
<div class="results_aggregated">
|
<div class="results_aggregated">
|
||||||
{{#if results}} {{#each results}}
|
{{#if results}} {{#each results}}
|
||||||
<div class="result">
|
<div class="result">
|
||||||
<h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
|
<h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
|
||||||
<small>{{{this.url}}}</small>
|
<small>{{{this.url}}}</small>
|
||||||
<p>{{{this.description}}}</p>
|
<p>{{{this.description}}}</p>
|
||||||
<div class="upstream_engines">
|
<div class="upstream_engines">
|
||||||
{{#each engine}}
|
{{#each engine}}
|
||||||
<span>{{{this}}}</span>
|
<span>{{{this}}}</span>
|
||||||
{{/each}}
|
{{/each}}
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
{{/each}} {{else}} {{#if disallowed}}
|
||||||
|
<div class="result_disallowed">
|
||||||
|
<div class="description">
|
||||||
|
<p>
|
||||||
|
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
||||||
|
has been disallowed.
|
||||||
|
</p>
|
||||||
|
<p class="description_paragraph">Dear user,</p>
|
||||||
|
<p class="description_paragraph">
|
||||||
|
The query - <span class="user_query">{{{this.pageQuery}}}</span> - has
|
||||||
|
been blacklisted via server configuration and hence disallowed by the
|
||||||
|
server. Henceforth no results could be displayed for your query.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<img src="./images/barricade.png" alt="Image of a Barricade" />
|
||||||
|
</div>
|
||||||
|
{{else}} {{#if filtered}}
|
||||||
|
<div class="result_filtered">
|
||||||
|
<div class="description">
|
||||||
|
<p>
|
||||||
|
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
||||||
|
has been filtered.
|
||||||
|
</p>
|
||||||
|
<p class="description_paragraph">Dear user,</p>
|
||||||
|
<p class="description_paragraph">
|
||||||
|
All the search results contain results that has been configured to be
|
||||||
|
filtered out via server configuration and henceforth has been
|
||||||
|
completely filtered out.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<img src="./images/filter.png" alt="Image of a paper inside a funnel" />
|
||||||
|
</div>
|
||||||
|
{{else}}
|
||||||
|
<div class="result_not_found">
|
||||||
|
<p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
|
||||||
|
<p class="suggestions">Suggestions:</p>
|
||||||
|
<ul>
|
||||||
|
<li>Make sure that all words are spelled correctly.</li>
|
||||||
|
<li>Try different keywords.</li>
|
||||||
|
<li>Try more general keywords.</li>
|
||||||
|
</ul>
|
||||||
|
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
||||||
|
</div>
|
||||||
|
{{/if}} {{/if}} {{/if}}
|
||||||
</div>
|
</div>
|
||||||
{{/each}} {{else}}
|
<div class="page_navigation">
|
||||||
<div class="result_not_found">
|
<button type="button" onclick="navigate_backward()">
|
||||||
<p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
|
← previous
|
||||||
<p class="suggestions">Suggestions:</p>
|
</button>
|
||||||
<ul>
|
<button type="button" onclick="navigate_forward()">next →</button>
|
||||||
<li>Make sure that all words are spelled correctly.</li>
|
|
||||||
<li>Try different keywords.</li>
|
|
||||||
<li>Try more general keywords.</li>
|
|
||||||
</ul>
|
|
||||||
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
|
||||||
</div>
|
</div>
|
||||||
{{/if}}
|
|
||||||
</div>
|
|
||||||
<div class="page_navigation">
|
|
||||||
<button type="button" onclick="navigate_backward()">
|
|
||||||
← previous
|
|
||||||
</button>
|
|
||||||
<button type="button" onclick="navigate_forward()">next →</button>
|
|
||||||
</div>
|
|
||||||
</main>
|
</main>
|
||||||
<script src="static/index.js"></script>
|
<script src="static/index.js"></script>
|
||||||
<script src="static/pagination.js"></script>
|
<script src="static/pagination.js"></script>
|
||||||
|
@ -3,9 +3,19 @@
|
|||||||
//! This module contains the main function which handles the logging of the application to the
|
//! This module contains the main function which handles the logging of the application to the
|
||||||
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
|
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
|
||||||
|
|
||||||
|
use mimalloc::MiMalloc;
|
||||||
use std::net::TcpListener;
|
use std::net::TcpListener;
|
||||||
use websurfx::{config::parser::Config, run};
|
use websurfx::{config::parser::Config, run};
|
||||||
|
|
||||||
|
/// A dhat heap memory profiler
|
||||||
|
#[cfg(feature = "dhat-heap")]
|
||||||
|
#[global_allocator]
|
||||||
|
static ALLOC: dhat::Alloc = dhat::Alloc;
|
||||||
|
|
||||||
|
#[cfg(not(feature = "dhat-heap"))]
|
||||||
|
#[global_allocator]
|
||||||
|
static GLOBAL: MiMalloc = MiMalloc;
|
||||||
|
|
||||||
/// The function that launches the main server and registers all the routes of the website.
|
/// The function that launches the main server and registers all the routes of the website.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
@ -14,6 +24,10 @@ use websurfx::{config::parser::Config, run};
|
|||||||
/// available for being used for other applications.
|
/// available for being used for other applications.
|
||||||
#[actix_web::main]
|
#[actix_web::main]
|
||||||
async fn main() -> std::io::Result<()> {
|
async fn main() -> std::io::Result<()> {
|
||||||
|
// A dhat heap profiler initialization.
|
||||||
|
#[cfg(feature = "dhat-heap")]
|
||||||
|
let _profiler = dhat::Profiler::new_heap();
|
||||||
|
|
||||||
// Initialize the parsed config file.
|
// Initialize the parsed config file.
|
||||||
let config = Config::parse(false).unwrap();
|
let config = Config::parse(false).unwrap();
|
||||||
|
|
||||||
|
130
src/cache/cacher.rs
vendored
130
src/cache/cacher.rs
vendored
@ -1,17 +1,24 @@
|
|||||||
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
||||||
//! from the upstream search engines in a json format.
|
//! from the upstream search engines in a json format.
|
||||||
|
|
||||||
|
use error_stack::Report;
|
||||||
|
use futures::future::try_join_all;
|
||||||
use md5::compute;
|
use md5::compute;
|
||||||
use redis::{Client, Commands, Connection};
|
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
||||||
|
|
||||||
|
use super::error::PoolError;
|
||||||
|
|
||||||
/// A named struct which stores the redis Connection url address to which the client will
|
/// A named struct which stores the redis Connection url address to which the client will
|
||||||
/// connect to.
|
/// connect to.
|
||||||
///
|
#[derive(Clone)]
|
||||||
/// # Fields
|
|
||||||
///
|
|
||||||
/// * `redis_connection_url` - It stores the redis Connection url address.
|
|
||||||
pub struct RedisCache {
|
pub struct RedisCache {
|
||||||
connection: Connection,
|
/// It stores a pool of connections ready to be used.
|
||||||
|
connection_pool: Vec<ConnectionManager>,
|
||||||
|
/// It stores the size of the connection pool (in other words the number of
|
||||||
|
/// connections that should be stored in the pool).
|
||||||
|
pool_size: u8,
|
||||||
|
/// It stores the index of which connection is being used at the moment.
|
||||||
|
current_connection: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RedisCache {
|
impl RedisCache {
|
||||||
@ -19,11 +26,25 @@ impl RedisCache {
|
|||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `redis_connection_url` - It stores the redis Connection url address.
|
/// * `redis_connection_url` - It takes the redis Connection url address.
|
||||||
pub fn new(redis_connection_url: String) -> Result<Self, Box<dyn std::error::Error>> {
|
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
|
||||||
|
/// connections that should be stored in the pool).
|
||||||
|
pub async fn new(
|
||||||
|
redis_connection_url: &str,
|
||||||
|
pool_size: u8,
|
||||||
|
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
let client = Client::open(redis_connection_url)?;
|
let client = Client::open(redis_connection_url)?;
|
||||||
let connection = client.get_connection()?;
|
let mut tasks: Vec<_> = Vec::new();
|
||||||
let redis_cache = RedisCache { connection };
|
|
||||||
|
for _ in 0..pool_size {
|
||||||
|
tasks.push(client.get_tokio_connection_manager());
|
||||||
|
}
|
||||||
|
|
||||||
|
let redis_cache = RedisCache {
|
||||||
|
connection_pool: try_join_all(tasks).await?,
|
||||||
|
pool_size,
|
||||||
|
current_connection: Default::default(),
|
||||||
|
};
|
||||||
Ok(redis_cache)
|
Ok(redis_cache)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,7 +53,7 @@ impl RedisCache {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `url` - It takes an url as string.
|
/// * `url` - It takes an url as string.
|
||||||
fn hash_url(url: &str) -> String {
|
fn hash_url(&self, url: &str) -> String {
|
||||||
format!("{:?}", compute(url))
|
format!("{:?}", compute(url))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,9 +62,42 @@ impl RedisCache {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `url` - It takes an url as a string.
|
/// * `url` - It takes an url as a string.
|
||||||
pub fn cached_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
pub async fn cached_json(&mut self, url: &str) -> Result<String, Report<PoolError>> {
|
||||||
let hashed_url_string = Self::hash_url(url);
|
self.current_connection = Default::default();
|
||||||
Ok(self.connection.get(hashed_url_string)?)
|
let hashed_url_string: &str = &self.hash_url(url);
|
||||||
|
|
||||||
|
let mut result: Result<String, RedisError> = self.connection_pool
|
||||||
|
[self.current_connection as usize]
|
||||||
|
.get(hashed_url_string)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Code to check whether the current connection being used is dropped with connection error
|
||||||
|
// or not. if it drops with the connection error then the current connection is replaced
|
||||||
|
// with a new connection from the pool which is then used to run the redis command then
|
||||||
|
// that connection is also checked whether it is dropped or not if it is not then the
|
||||||
|
// result is passed as a `Result` or else the same process repeats again and if all of the
|
||||||
|
// connections in the pool result in connection drop error then a custom pool error is
|
||||||
|
// returned.
|
||||||
|
loop {
|
||||||
|
match result {
|
||||||
|
Err(error) => match error.is_connection_dropped() {
|
||||||
|
true => {
|
||||||
|
self.current_connection += 1;
|
||||||
|
if self.current_connection == self.pool_size {
|
||||||
|
return Err(Report::new(
|
||||||
|
PoolError::PoolExhaustionWithConnectionDropError,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
result = self.connection_pool[self.current_connection as usize]
|
||||||
|
.get(hashed_url_string)
|
||||||
|
.await;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
false => return Err(Report::new(PoolError::RedisError(error))),
|
||||||
|
},
|
||||||
|
Ok(res) => return Ok(res),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A function which caches the results by using the hashed `url` as the key and
|
/// A function which caches the results by using the hashed `url` as the key and
|
||||||
@ -54,21 +108,45 @@ impl RedisCache {
|
|||||||
///
|
///
|
||||||
/// * `json_results` - It takes the json results string as an argument.
|
/// * `json_results` - It takes the json results string as an argument.
|
||||||
/// * `url` - It takes the url as a String.
|
/// * `url` - It takes the url as a String.
|
||||||
pub fn cache_results(
|
pub async fn cache_results(
|
||||||
&mut self,
|
&mut self,
|
||||||
json_results: String,
|
json_results: &str,
|
||||||
url: &str,
|
url: &str,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Report<PoolError>> {
|
||||||
let hashed_url_string = Self::hash_url(url);
|
self.current_connection = Default::default();
|
||||||
|
let hashed_url_string: &str = &self.hash_url(url);
|
||||||
|
|
||||||
// put results_json into cache
|
let mut result: Result<(), RedisError> = self.connection_pool
|
||||||
self.connection.set(&hashed_url_string, json_results)?;
|
[self.current_connection as usize]
|
||||||
|
.set_ex(hashed_url_string, json_results, 60)
|
||||||
|
.await;
|
||||||
|
|
||||||
// Set the TTL for the key to 60 seconds
|
// Code to check whether the current connection being used is dropped with connection error
|
||||||
self.connection
|
// or not. if it drops with the connection error then the current connection is replaced
|
||||||
.expire::<String, u32>(hashed_url_string, 60)
|
// with a new connection from the pool which is then used to run the redis command then
|
||||||
.unwrap();
|
// that connection is also checked whether it is dropped or not if it is not then the
|
||||||
|
// result is passed as a `Result` or else the same process repeats again and if all of the
|
||||||
Ok(())
|
// connections in the pool result in connection drop error then a custom pool error is
|
||||||
|
// returned.
|
||||||
|
loop {
|
||||||
|
match result {
|
||||||
|
Err(error) => match error.is_connection_dropped() {
|
||||||
|
true => {
|
||||||
|
self.current_connection += 1;
|
||||||
|
if self.current_connection == self.pool_size {
|
||||||
|
return Err(Report::new(
|
||||||
|
PoolError::PoolExhaustionWithConnectionDropError,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
result = self.connection_pool[self.current_connection as usize]
|
||||||
|
.set_ex(hashed_url_string, json_results, 60)
|
||||||
|
.await;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
false => return Err(Report::new(PoolError::RedisError(error))),
|
||||||
|
},
|
||||||
|
Ok(_) => return Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
37
src/cache/error.rs
vendored
Normal file
37
src/cache/error.rs
vendored
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||||
|
//! the redis server using an async connection pool.
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use redis::RedisError;
|
||||||
|
|
||||||
|
/// A custom error type used for handling redis async pool associated errors.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum PoolError {
|
||||||
|
/// This variant handles all errors related to `RedisError`,
|
||||||
|
RedisError(RedisError),
|
||||||
|
/// This variant handles the errors which occurs when all the connections
|
||||||
|
/// in the connection pool return a connection dropped redis error.
|
||||||
|
PoolExhaustionWithConnectionDropError,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for PoolError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
PoolError::RedisError(redis_error) => {
|
||||||
|
if let Some(detail) = redis_error.detail() {
|
||||||
|
write!(f, "{}", detail)
|
||||||
|
} else {
|
||||||
|
write!(f, "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PoolError::PoolExhaustionWithConnectionDropError => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Error all connections from the pool dropped with connection error"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl error_stack::Context for PoolError {}
|
4
src/cache/mod.rs
vendored
4
src/cache/mod.rs
vendored
@ -1 +1,5 @@
|
|||||||
|
//! This module provides the modules which provide the functionality to cache the aggregated
|
||||||
|
//! results fetched and aggregated from the upstream search engines in a json format.
|
||||||
|
|
||||||
pub mod cacher;
|
pub mod cacher;
|
||||||
|
pub mod error;
|
||||||
|
@ -1,2 +1,4 @@
|
|||||||
|
//! This module provides the modules which handles the functionality to parse the lua config
|
||||||
|
//! and convert the config options into rust readable form.
|
||||||
|
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod parser_models;
|
|
||||||
|
@ -3,49 +3,40 @@
|
|||||||
|
|
||||||
use crate::handler::paths::{file_path, FileType};
|
use crate::handler::paths::{file_path, FileType};
|
||||||
|
|
||||||
use super::parser_models::Style;
|
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
||||||
use log::LevelFilter;
|
use log::LevelFilter;
|
||||||
use rlua::Lua;
|
use mlua::Lua;
|
||||||
use std::{collections::HashMap, fs, thread::available_parallelism};
|
use std::{collections::HashMap, fs, thread::available_parallelism};
|
||||||
|
|
||||||
/// A named struct which stores the parsed config file options.
|
/// A named struct which stores the parsed config file options.
|
||||||
///
|
|
||||||
/// # Fields
|
|
||||||
//
|
|
||||||
/// * `port` - It stores the parsed port number option on which the server should launch.
|
|
||||||
/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
|
|
||||||
/// * `style` - It stores the theming options for the website.
|
|
||||||
/// * `redis_url` - It stores the redis connection url address on which the redis
|
|
||||||
/// client should connect.
|
|
||||||
/// * `aggregator` - It stores the option to whether enable or disable production use.
|
|
||||||
/// * `logging` - It stores the option to whether enable or disable logs.
|
|
||||||
/// * `debug` - It stores the option to whether enable or disable debug mode.
|
|
||||||
/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
|
|
||||||
/// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
|
|
||||||
/// * `threads` - It stores the number of threads which controls the app will use to run.
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
|
/// It stores the parsed port number option on which the server should launch.
|
||||||
pub port: u16,
|
pub port: u16,
|
||||||
|
/// It stores the parsed ip address option on which the server should launch
|
||||||
pub binding_ip: String,
|
pub binding_ip: String,
|
||||||
|
/// It stores the theming options for the website.
|
||||||
pub style: Style,
|
pub style: Style,
|
||||||
|
/// It stores the redis connection url address on which the redis
|
||||||
|
/// client should connect.
|
||||||
pub redis_url: String,
|
pub redis_url: String,
|
||||||
|
/// It stores the option to whether enable or disable production use.
|
||||||
pub aggregator: AggregatorConfig,
|
pub aggregator: AggregatorConfig,
|
||||||
|
/// It stores the option to whether enable or disable logs.
|
||||||
pub logging: bool,
|
pub logging: bool,
|
||||||
|
/// It stores the option to whether enable or disable debug mode.
|
||||||
pub debug: bool,
|
pub debug: bool,
|
||||||
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
|
/// It stores all the engine names that were enabled by the user.
|
||||||
|
pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
|
||||||
|
/// It stores the time (secs) which controls the server request timeout.
|
||||||
pub request_timeout: u8,
|
pub request_timeout: u8,
|
||||||
|
/// It stores the number of threads which controls the app will use to run.
|
||||||
pub threads: u8,
|
pub threads: u8,
|
||||||
}
|
/// It stores configuration options for the ratelimiting middleware.
|
||||||
|
pub rate_limiter: RateLimiter,
|
||||||
/// Configuration options for the aggregator.
|
/// It stores the level of safe search to be used for restricting content in the
|
||||||
///
|
/// search results.
|
||||||
/// # Fields
|
pub safe_search: u8,
|
||||||
///
|
|
||||||
/// * `random_delay` - It stores the option to whether enable or disable random delays between
|
|
||||||
/// requests.
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct AggregatorConfig {
|
|
||||||
pub random_delay: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
@ -63,58 +54,80 @@ impl Config {
|
|||||||
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
|
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
|
||||||
/// Config struct with all the parsed config options from the parsed config file.
|
/// Config struct with all the parsed config options from the parsed config file.
|
||||||
pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
|
pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
Lua::new().context(|context| -> Result<Self, Box<dyn std::error::Error>> {
|
let lua = Lua::new();
|
||||||
let globals = context.globals();
|
let globals = lua.globals();
|
||||||
|
|
||||||
context
|
lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
||||||
.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
.exec()?;
|
||||||
.exec()?;
|
|
||||||
|
|
||||||
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
|
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
|
||||||
|
|
||||||
let debug: bool = globals.get::<_, bool>("debug")?;
|
let debug: bool = globals.get::<_, bool>("debug")?;
|
||||||
let logging:bool= globals.get::<_, bool>("logging")?;
|
let logging: bool = globals.get::<_, bool>("logging")?;
|
||||||
|
|
||||||
if !logging_initialized {
|
if !logging_initialized {
|
||||||
set_logging_level(debug, logging);
|
set_logging_level(debug, logging);
|
||||||
|
}
|
||||||
|
|
||||||
|
let threads: u8 = if parsed_threads == 0 {
|
||||||
|
let total_num_of_threads: usize = available_parallelism()?.get() / 2;
|
||||||
|
log::error!(
|
||||||
|
"Config Error: The value of `threads` option should be a non zero positive integer"
|
||||||
|
);
|
||||||
|
log::error!("Falling back to using {} threads", total_num_of_threads);
|
||||||
|
total_num_of_threads as u8
|
||||||
|
} else {
|
||||||
|
parsed_threads
|
||||||
|
};
|
||||||
|
|
||||||
|
let rate_limiter = globals.get::<_, HashMap<String, u8>>("rate_limiter")?;
|
||||||
|
|
||||||
|
let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
|
||||||
|
let safe_search: u8 = match parsed_safe_search {
|
||||||
|
0..=4 => parsed_safe_search,
|
||||||
|
_ => {
|
||||||
|
log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4.");
|
||||||
|
log::error!("Falling back to using the value `1` for the option");
|
||||||
|
1
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let threads: u8 = if parsed_threads == 0 {
|
Ok(Config {
|
||||||
let total_num_of_threads: usize = available_parallelism()?.get() / 2;
|
port: globals.get::<_, u16>("port")?,
|
||||||
log::error!("Config Error: The value of `threads` option should be a non zero positive integer");
|
binding_ip: globals.get::<_, String>("binding_ip")?,
|
||||||
log::error!("Falling back to using {} threads", total_num_of_threads);
|
style: Style::new(
|
||||||
total_num_of_threads as u8
|
globals.get::<_, String>("theme")?,
|
||||||
} else {
|
globals.get::<_, String>("colorscheme")?,
|
||||||
parsed_threads
|
),
|
||||||
};
|
redis_url: globals.get::<_, String>("redis_url")?,
|
||||||
|
aggregator: AggregatorConfig {
|
||||||
Ok(Config {
|
random_delay: globals.get::<_, bool>("production_use")?,
|
||||||
port: globals.get::<_, u16>("port")?,
|
},
|
||||||
binding_ip: globals.get::<_, String>("binding_ip")?,
|
logging,
|
||||||
style: Style::new(
|
debug,
|
||||||
globals.get::<_, String>("theme")?,
|
upstream_search_engines: globals
|
||||||
globals.get::<_, String>("colorscheme")?,
|
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
||||||
),
|
.into_iter()
|
||||||
redis_url: globals.get::<_, String>("redis_url")?,
|
.filter_map(|(key, value)| value.then_some(key))
|
||||||
aggregator: AggregatorConfig {
|
.filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
|
||||||
random_delay: globals.get::<_, bool>("production_use")?,
|
.collect(),
|
||||||
},
|
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
||||||
logging,
|
threads,
|
||||||
debug,
|
rate_limiter: RateLimiter {
|
||||||
upstream_search_engines: globals
|
number_of_requests: rate_limiter["number_of_requests"],
|
||||||
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
time_limit: rate_limiter["time_limit"],
|
||||||
.into_iter()
|
},
|
||||||
.filter_map(|(key, value)| value.then_some(key))
|
safe_search,
|
||||||
.filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine))
|
|
||||||
.collect(),
|
|
||||||
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
|
||||||
threads,
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// a helper function that sets the proper logging level
|
/// a helper function that sets the proper logging level
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `debug` - It takes the option to whether enable or disable debug mode.
|
||||||
|
/// * `logging` - It takes the option to whether enable or disable logs.
|
||||||
fn set_logging_level(debug: bool, logging: bool) {
|
fn set_logging_level(debug: bool, logging: bool) {
|
||||||
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
|
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
|
||||||
if pkg_env_var.to_lowercase() == "dev" {
|
if pkg_env_var.to_lowercase() == "dev" {
|
||||||
|
@ -4,14 +4,14 @@
|
|||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
use reqwest::header::HeaderMap;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
use crate::results::aggregation_models::SearchResult;
|
use crate::models::aggregation_models::SearchResult;
|
||||||
|
|
||||||
use super::engine_models::{EngineError, SearchEngine};
|
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||||
|
|
||||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
|
|
||||||
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||||
@ -19,30 +19,13 @@ pub struct DuckDuckGo;
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl SearchEngine for DuckDuckGo {
|
impl SearchEngine for DuckDuckGo {
|
||||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
|
||||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
|
||||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
|
||||||
/// values are RawSearchResult struct and then returns it within a Result enum.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
|
||||||
/// * `page` - Takes an u32 as an argument.
|
|
||||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
|
||||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
///
|
|
||||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
|
||||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
|
||||||
/// provide results for the requested search query and also returns error if the scraping selector
|
|
||||||
/// or HeaderMap fails to initialize.
|
|
||||||
async fn results(
|
async fn results(
|
||||||
&self,
|
&self,
|
||||||
query: String,
|
query: &str,
|
||||||
page: u32,
|
page: u32,
|
||||||
user_agent: String,
|
user_agent: &str,
|
||||||
request_timeout: u8,
|
request_timeout: u8,
|
||||||
|
_safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||||
// Page number can be missing or empty string and so appropriate handling is required
|
// Page number can be missing or empty string and so appropriate handling is required
|
||||||
// so that upstream server recieves valid page number.
|
// so that upstream server recieves valid page number.
|
||||||
@ -61,38 +44,19 @@ impl SearchEngine for DuckDuckGo {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// initializing HeaderMap and adding appropriate headers.
|
// initializing HeaderMap and adding appropriate headers.
|
||||||
let mut header_map = HeaderMap::new();
|
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||||
header_map.insert(
|
("USER_AGENT".to_string(), user_agent.to_string()),
|
||||||
USER_AGENT,
|
("REFERER".to_string(), "https://google.com/".to_string()),
|
||||||
user_agent
|
(
|
||||||
.parse()
|
"CONTENT_TYPE".to_string(),
|
||||||
.into_report()
|
"application/x-www-form-urlencoded".to_string(),
|
||||||
.change_context(EngineError::UnexpectedError)?,
|
),
|
||||||
);
|
("COOKIE".to_string(), "kl=wt-wt".to_string()),
|
||||||
header_map.insert(
|
]))
|
||||||
REFERER,
|
.change_context(EngineError::UnexpectedError)?;
|
||||||
"https://google.com/"
|
|
||||||
.parse()
|
|
||||||
.into_report()
|
|
||||||
.change_context(EngineError::UnexpectedError)?,
|
|
||||||
);
|
|
||||||
header_map.insert(
|
|
||||||
CONTENT_TYPE,
|
|
||||||
"application/x-www-form-urlencoded"
|
|
||||||
.parse()
|
|
||||||
.into_report()
|
|
||||||
.change_context(EngineError::UnexpectedError)?,
|
|
||||||
);
|
|
||||||
header_map.insert(
|
|
||||||
COOKIE,
|
|
||||||
"kl=wt-wt"
|
|
||||||
.parse()
|
|
||||||
.into_report()
|
|
||||||
.change_context(EngineError::UnexpectedError)?,
|
|
||||||
);
|
|
||||||
|
|
||||||
let document: Html = Html::parse_document(
|
let document: Html = Html::parse_document(
|
||||||
&DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
&DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||||
);
|
);
|
||||||
|
|
||||||
let no_result: Selector = Selector::parse(".no-results")
|
let no_result: Selector = Selector::parse(".no-results")
|
||||||
@ -126,8 +90,7 @@ impl SearchEngine for DuckDuckGo {
|
|||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inner_html()
|
.inner_html()
|
||||||
.trim()
|
.trim(),
|
||||||
.to_string(),
|
|
||||||
format!(
|
format!(
|
||||||
"https://{}",
|
"https://{}",
|
||||||
result
|
result
|
||||||
@ -136,15 +99,15 @@ impl SearchEngine for DuckDuckGo {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.inner_html()
|
.inner_html()
|
||||||
.trim()
|
.trim()
|
||||||
),
|
)
|
||||||
|
.as_str(),
|
||||||
result
|
result
|
||||||
.select(&result_desc)
|
.select(&result_desc)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inner_html()
|
.inner_html()
|
||||||
.trim()
|
.trim(),
|
||||||
.to_string(),
|
&["duckduckgo"],
|
||||||
vec!["duckduckgo".to_string()],
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.map(|search_result| (search_result.url.clone(), search_result))
|
.map(|search_result| (search_result.url.clone(), search_result))
|
||||||
|
@ -1,109 +0,0 @@
|
|||||||
//! This module provides the error enum to handle different errors associated while requesting data from
|
|
||||||
//! the upstream search engines with the search query provided by the user.
|
|
||||||
|
|
||||||
use crate::results::aggregation_models::SearchResult;
|
|
||||||
use error_stack::{IntoReport, Result, ResultExt};
|
|
||||||
use std::{collections::HashMap, fmt, time::Duration};
|
|
||||||
|
|
||||||
/// A custom error type used for handle engine associated errors.
|
|
||||||
///
|
|
||||||
/// This enum provides variants three different categories of errors:
|
|
||||||
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
|
|
||||||
/// etc.
|
|
||||||
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
|
|
||||||
/// search engines.
|
|
||||||
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
|
||||||
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
|
||||||
/// all other errors occurring within the code handling the `upstream search engines`.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum EngineError {
|
|
||||||
EmptyResultSet,
|
|
||||||
RequestError,
|
|
||||||
UnexpectedError,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for EngineError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
EngineError::EmptyResultSet => {
|
|
||||||
write!(f, "The upstream search engine returned an empty result set")
|
|
||||||
}
|
|
||||||
EngineError::RequestError => {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"Error occurred while requesting data from upstream search engine"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
EngineError::UnexpectedError => {
|
|
||||||
write!(f, "An unexpected error occurred while processing the data")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl error_stack::Context for EngineError {}
|
|
||||||
|
|
||||||
/// A trait to define common behavior for all search engines.
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
pub trait SearchEngine: Sync + Send {
|
|
||||||
async fn fetch_html_from_upstream(
|
|
||||||
&self,
|
|
||||||
url: String,
|
|
||||||
header_map: reqwest::header::HeaderMap,
|
|
||||||
request_timeout: u8,
|
|
||||||
) -> Result<String, EngineError> {
|
|
||||||
// fetch the html from upstream search engine
|
|
||||||
Ok(reqwest::Client::new()
|
|
||||||
.get(url)
|
|
||||||
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
|
||||||
.headers(header_map) // add spoofed headers to emulate human behavior
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.into_report()
|
|
||||||
.change_context(EngineError::RequestError)?
|
|
||||||
.text()
|
|
||||||
.await
|
|
||||||
.into_report()
|
|
||||||
.change_context(EngineError::RequestError)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn results(
|
|
||||||
&self,
|
|
||||||
query: String,
|
|
||||||
page: u32,
|
|
||||||
user_agent: String,
|
|
||||||
request_timeout: u8,
|
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct EngineHandler {
|
|
||||||
engine: Box<dyn SearchEngine>,
|
|
||||||
name: &'static str,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Clone for EngineHandler {
|
|
||||||
fn clone(&self) -> Self {
|
|
||||||
Self::new(self.name).unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EngineHandler {
|
|
||||||
/// parses an engine name into an engine handler, returns none if the engine is unknown
|
|
||||||
pub fn new(engine_name: &str) -> Option<Self> {
|
|
||||||
let engine: (&'static str, Box<dyn SearchEngine>) =
|
|
||||||
match engine_name.to_lowercase().as_str() {
|
|
||||||
"duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
|
|
||||||
"searx" => ("searx", Box::new(super::searx::Searx)),
|
|
||||||
_ => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
Some(Self {
|
|
||||||
engine: engine.1,
|
|
||||||
name: engine.0,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
|
||||||
(self.name, self.engine)
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,3 +1,7 @@
|
|||||||
|
//! This module provides different modules which handles the functionlity to fetch results from the
|
||||||
|
//! upstream search engines based on user requested queries. Also provides different models to
|
||||||
|
//! provide a standard functions to be implemented for all the upstream search engine handling
|
||||||
|
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
||||||
|
|
||||||
pub mod duckduckgo;
|
pub mod duckduckgo;
|
||||||
pub mod engine_models;
|
|
||||||
pub mod searx;
|
pub mod searx;
|
||||||
|
@ -2,14 +2,13 @@
|
|||||||
//! by querying the upstream searx search engine instance with user provided query and with a page
|
//! by querying the upstream searx search engine instance with user provided query and with a page
|
||||||
//! number if provided.
|
//! number if provided.
|
||||||
|
|
||||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
use reqwest::header::HeaderMap;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::results::aggregation_models::SearchResult;
|
use crate::models::aggregation_models::SearchResult;
|
||||||
|
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||||
use super::engine_models::{EngineError, SearchEngine};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
|
||||||
|
|
||||||
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||||
@ -17,66 +16,40 @@ pub struct Searx;
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl SearchEngine for Searx {
|
impl SearchEngine for Searx {
|
||||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
|
||||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
|
||||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
|
||||||
/// values are RawSearchResult struct and then returns it within a Result enum.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
|
||||||
/// * `page` - Takes an u32 as an argument.
|
|
||||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
|
||||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
///
|
|
||||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
|
||||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
|
||||||
/// provide results for the requested search query and also returns error if the scraping selector
|
|
||||||
/// or HeaderMap fails to initialize.
|
|
||||||
|
|
||||||
async fn results(
|
async fn results(
|
||||||
&self,
|
&self,
|
||||||
query: String,
|
query: &str,
|
||||||
page: u32,
|
page: u32,
|
||||||
user_agent: String,
|
user_agent: &str,
|
||||||
request_timeout: u8,
|
request_timeout: u8,
|
||||||
|
mut safe_search: u8,
|
||||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||||
// Page number can be missing or empty string and so appropriate handling is required
|
// Page number can be missing or empty string and so appropriate handling is required
|
||||||
// so that upstream server recieves valid page number.
|
// so that upstream server recieves valid page number.
|
||||||
|
if safe_search == 3 {
|
||||||
|
safe_search = 2;
|
||||||
|
};
|
||||||
|
|
||||||
let url: String = match page {
|
let url: String = match page {
|
||||||
0 | 1 => format!("https://searx.work/search?q={query}&pageno=1"),
|
0 | 1 => {
|
||||||
_ => format!("https://searx.work/search?q={query}&pageno={page}"),
|
format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}")
|
||||||
|
}
|
||||||
|
_ => format!(
|
||||||
|
"https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}"
|
||||||
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
// initializing headers and adding appropriate headers.
|
// initializing headers and adding appropriate headers.
|
||||||
let mut header_map = HeaderMap::new();
|
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||||
header_map.insert(
|
("USER_AGENT".to_string(), user_agent.to_string()),
|
||||||
USER_AGENT,
|
("REFERER".to_string(), "https://google.com/".to_string()),
|
||||||
user_agent
|
("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
|
||||||
.parse()
|
("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
|
||||||
.into_report()
|
]))
|
||||||
.change_context(EngineError::UnexpectedError)?,
|
.change_context(EngineError::UnexpectedError)?;
|
||||||
);
|
|
||||||
header_map.insert(
|
|
||||||
REFERER,
|
|
||||||
"https://google.com/"
|
|
||||||
.parse()
|
|
||||||
.into_report()
|
|
||||||
.change_context(EngineError::UnexpectedError)?,
|
|
||||||
);
|
|
||||||
header_map.insert(
|
|
||||||
CONTENT_TYPE,
|
|
||||||
"application/x-www-form-urlencoded"
|
|
||||||
.parse()
|
|
||||||
.into_report()
|
|
||||||
.change_context(EngineError::UnexpectedError)?,
|
|
||||||
);
|
|
||||||
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
|
|
||||||
|
|
||||||
let document: Html = Html::parse_document(
|
let document: Html = Html::parse_document(
|
||||||
&Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
&Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||||
);
|
);
|
||||||
|
|
||||||
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
||||||
@ -117,24 +90,21 @@ impl SearchEngine for Searx {
|
|||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inner_html()
|
.inner_html()
|
||||||
.trim()
|
.trim(),
|
||||||
.to_string(),
|
|
||||||
result
|
result
|
||||||
.select(&result_url)
|
.select(&result_url)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.value()
|
.value()
|
||||||
.attr("href")
|
.attr("href")
|
||||||
.unwrap()
|
.unwrap(),
|
||||||
.to_string(),
|
|
||||||
result
|
result
|
||||||
.select(&result_desc)
|
.select(&result_desc)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inner_html()
|
.inner_html()
|
||||||
.trim()
|
.trim(),
|
||||||
.to_string(),
|
&["searx"],
|
||||||
vec!["searx".to_string()],
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.map(|search_result| (search_result.url.clone(), search_result))
|
.map(|search_result| (search_result.url.clone(), search_result))
|
||||||
|
@ -1 +1,5 @@
|
|||||||
|
//! This module provides modules which provide the functionality to handle paths for different
|
||||||
|
//! files present on different paths and provide one appropriate path on which it is present and
|
||||||
|
//! can be used.
|
||||||
|
|
||||||
pub mod paths;
|
pub mod paths;
|
||||||
|
@ -4,108 +4,116 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::Error;
|
use std::io::Error;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
// ------- Constants --------
|
// ------- Constants --------
|
||||||
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
/// The constant holding the name of the theme folder.
|
||||||
static COMMON_DIRECTORY_NAME: &str = "websurfx";
|
const PUBLIC_DIRECTORY_NAME: &str = "public";
|
||||||
static CONFIG_FILE_NAME: &str = "config.lua";
|
/// The constant holding the name of the common folder.
|
||||||
static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
|
const COMMON_DIRECTORY_NAME: &str = "websurfx";
|
||||||
static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
|
/// The constant holding the name of the config file.
|
||||||
|
const CONFIG_FILE_NAME: &str = "config.lua";
|
||||||
|
/// The constant holding the name of the AllowList text file.
|
||||||
|
const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
|
||||||
|
/// The constant holding the name of the BlockList text file.
|
||||||
|
const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
|
||||||
|
|
||||||
|
/// An enum type which provides different variants to handle paths for various files/folders.
|
||||||
#[derive(Hash, PartialEq, Eq, Debug)]
|
#[derive(Hash, PartialEq, Eq, Debug)]
|
||||||
pub enum FileType {
|
pub enum FileType {
|
||||||
|
/// This variant handles all the paths associated with the config file.
|
||||||
Config,
|
Config,
|
||||||
|
/// This variant handles all the paths associated with the Allowlist text file.
|
||||||
AllowList,
|
AllowList,
|
||||||
|
/// This variant handles all the paths associated with the BlockList text file.
|
||||||
BlockList,
|
BlockList,
|
||||||
|
/// This variant handles all the paths associated with the public folder (Theme folder).
|
||||||
Theme,
|
Theme,
|
||||||
}
|
}
|
||||||
|
|
||||||
static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
|
/// A static variable which stores the different filesystem paths for various file/folder types.
|
||||||
once_cell::sync::Lazy::new(|| {
|
static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
|
||||||
HashMap::from([
|
|
||||||
(
|
|
||||||
FileType::Config,
|
|
||||||
vec![
|
|
||||||
format!(
|
|
||||||
"{}/.config/{}/{}",
|
|
||||||
std::env::var("HOME").unwrap(),
|
|
||||||
COMMON_DIRECTORY_NAME,
|
|
||||||
CONFIG_FILE_NAME
|
|
||||||
),
|
|
||||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
|
||||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
FileType::Theme,
|
|
||||||
vec![
|
|
||||||
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
|
|
||||||
format!("./{}/", PUBLIC_DIRECTORY_NAME),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
FileType::AllowList,
|
|
||||||
vec![
|
|
||||||
format!(
|
|
||||||
"{}/.config/{}/{}",
|
|
||||||
std::env::var("HOME").unwrap(),
|
|
||||||
COMMON_DIRECTORY_NAME,
|
|
||||||
ALLOWLIST_FILE_NAME
|
|
||||||
),
|
|
||||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
|
||||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
FileType::BlockList,
|
|
||||||
vec![
|
|
||||||
format!(
|
|
||||||
"{}/.config/{}/{}",
|
|
||||||
std::env::var("HOME").unwrap(),
|
|
||||||
COMMON_DIRECTORY_NAME,
|
|
||||||
BLOCKLIST_FILE_NAME
|
|
||||||
),
|
|
||||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
|
||||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
])
|
|
||||||
});
|
|
||||||
|
|
||||||
/// A helper function which returns an appropriate config file path checking if the config
|
/// A function which returns an appropriate path for thr provided file type by checking if the path
|
||||||
/// file exists on that path.
|
/// for the given file type exists on that path.
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Error
|
||||||
///
|
///
|
||||||
/// Returns a `config file not found!!` error if the config file is not present under following
|
/// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
|
||||||
/// paths which are:
|
/// present on the path on which it is being tested.
|
||||||
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
|
|
||||||
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
|
|
||||||
/// one (3).
|
|
||||||
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
|
||||||
/// here then it returns an error as mentioned above.
|
|
||||||
|
|
||||||
/// A function which returns an appropriate theme directory path checking if the theme
|
|
||||||
/// directory exists on that path.
|
|
||||||
///
|
///
|
||||||
/// # Error
|
/// # Example
|
||||||
|
///
|
||||||
|
/// If this function is give the file_type of Theme variant then the theme folder is checked by the
|
||||||
|
/// following steps:
|
||||||
///
|
///
|
||||||
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
|
|
||||||
/// paths which are:
|
|
||||||
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
||||||
/// 2. Under project folder ( or codebase in other words) if it is not present
|
/// 2. Under project folder ( or codebase in other words) if it is not present
|
||||||
/// here then it returns an error as mentioned above.
|
/// here then it returns an error as mentioned above.
|
||||||
pub fn file_path(file_type: FileType) -> Result<String, Error> {
|
pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
|
||||||
let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
|
let file_path: &Vec<String> = FILE_PATHS_FOR_DIFF_FILE_TYPES
|
||||||
|
.get_or_init(|| {
|
||||||
|
HashMap::from([
|
||||||
|
(
|
||||||
|
FileType::Config,
|
||||||
|
vec![
|
||||||
|
format!(
|
||||||
|
"{}/.config/{}/{}",
|
||||||
|
std::env::var("HOME").unwrap(),
|
||||||
|
COMMON_DIRECTORY_NAME,
|
||||||
|
CONFIG_FILE_NAME
|
||||||
|
),
|
||||||
|
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
||||||
|
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
FileType::Theme,
|
||||||
|
vec![
|
||||||
|
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
|
||||||
|
format!("./{}/", PUBLIC_DIRECTORY_NAME),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
FileType::AllowList,
|
||||||
|
vec![
|
||||||
|
format!(
|
||||||
|
"{}/.config/{}/{}",
|
||||||
|
std::env::var("HOME").unwrap(),
|
||||||
|
COMMON_DIRECTORY_NAME,
|
||||||
|
ALLOWLIST_FILE_NAME
|
||||||
|
),
|
||||||
|
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
||||||
|
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
FileType::BlockList,
|
||||||
|
vec![
|
||||||
|
format!(
|
||||||
|
"{}/.config/{}/{}",
|
||||||
|
std::env::var("HOME").unwrap(),
|
||||||
|
COMMON_DIRECTORY_NAME,
|
||||||
|
BLOCKLIST_FILE_NAME
|
||||||
|
),
|
||||||
|
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
||||||
|
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
])
|
||||||
|
})
|
||||||
|
.get(&file_type)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
for (idx, _) in file_path.iter().enumerate() {
|
for (idx, _) in file_path.iter().enumerate() {
|
||||||
if Path::new(file_path[idx].as_str()).exists() {
|
if Path::new(file_path[idx].as_str()).exists() {
|
||||||
return Ok(file_path[idx].clone());
|
return Ok(std::mem::take(&mut &*file_path[idx]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if no of the configs above exist, return error
|
// if no of the configs above exist, return error
|
||||||
Err(Error::new(
|
Err(Error::new(
|
||||||
std::io::ErrorKind::NotFound,
|
std::io::ErrorKind::NotFound,
|
||||||
format!("{:?} file not found!!", file_type),
|
format!("{:?} file/folder not found!!", file_type),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
35
src/lib.rs
35
src/lib.rs
@ -1,19 +1,25 @@
|
|||||||
//! This main library module provides the functionality to provide and handle the Tcp server
|
//! This main library module provides the functionality to provide and handle the Tcp server
|
||||||
//! and register all the routes for the `websurfx` meta search engine website.
|
//! and register all the routes for the `websurfx` meta search engine website.
|
||||||
|
|
||||||
|
#![forbid(unsafe_code, clippy::panic)]
|
||||||
|
#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
|
||||||
|
#![warn(clippy::cognitive_complexity, rust_2018_idioms)]
|
||||||
|
|
||||||
pub mod cache;
|
pub mod cache;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod engines;
|
pub mod engines;
|
||||||
pub mod handler;
|
pub mod handler;
|
||||||
|
pub mod models;
|
||||||
pub mod results;
|
pub mod results;
|
||||||
pub mod server;
|
pub mod server;
|
||||||
|
|
||||||
use std::net::TcpListener;
|
use std::net::TcpListener;
|
||||||
|
|
||||||
use crate::server::routes;
|
use crate::server::router;
|
||||||
|
|
||||||
use actix_cors::Cors;
|
use actix_cors::Cors;
|
||||||
use actix_files as fs;
|
use actix_files as fs;
|
||||||
|
use actix_governor::{Governor, GovernorConfigBuilder};
|
||||||
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
||||||
use config::parser::Config;
|
use config::parser::Config;
|
||||||
use handlebars::Handlebars;
|
use handlebars::Handlebars;
|
||||||
@ -40,15 +46,15 @@ use handler::paths::{file_path, FileType};
|
|||||||
/// let server = run(listener,config).expect("Failed to start server");
|
/// let server = run(listener,config).expect("Failed to start server");
|
||||||
/// ```
|
/// ```
|
||||||
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
||||||
let mut handlebars: Handlebars = Handlebars::new();
|
let mut handlebars: Handlebars<'_> = Handlebars::new();
|
||||||
|
|
||||||
let public_folder_path: String = file_path(FileType::Theme)?;
|
let public_folder_path: &str = file_path(FileType::Theme)?;
|
||||||
|
|
||||||
handlebars
|
handlebars
|
||||||
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let handlebars_ref: web::Data<Handlebars> = web::Data::new(handlebars);
|
let handlebars_ref: web::Data<Handlebars<'_>> = web::Data::new(handlebars);
|
||||||
|
|
||||||
let cloned_config_threads_opt: u8 = config.threads;
|
let cloned_config_threads_opt: u8 = config.threads;
|
||||||
|
|
||||||
@ -64,10 +70,17 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
App::new()
|
App::new()
|
||||||
|
.wrap(Logger::default()) // added logging middleware for logging.
|
||||||
.app_data(handlebars_ref.clone())
|
.app_data(handlebars_ref.clone())
|
||||||
.app_data(web::Data::new(config.clone()))
|
.app_data(web::Data::new(config.clone()))
|
||||||
.wrap(cors)
|
.wrap(cors)
|
||||||
.wrap(Logger::default()) // added logging middleware for logging.
|
.wrap(Governor::new(
|
||||||
|
&GovernorConfigBuilder::default()
|
||||||
|
.per_second(config.rate_limiter.time_limit as u64)
|
||||||
|
.burst_size(config.rate_limiter.number_of_requests as u32)
|
||||||
|
.finish()
|
||||||
|
.unwrap(),
|
||||||
|
))
|
||||||
// Serve images and static files (css and js files).
|
// Serve images and static files (css and js files).
|
||||||
.service(
|
.service(
|
||||||
fs::Files::new("/static", format!("{}/static", public_folder_path))
|
fs::Files::new("/static", format!("{}/static", public_folder_path))
|
||||||
@ -77,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|||||||
fs::Files::new("/images", format!("{}/images", public_folder_path))
|
fs::Files::new("/images", format!("{}/images", public_folder_path))
|
||||||
.show_files_listing(),
|
.show_files_listing(),
|
||||||
)
|
)
|
||||||
.service(routes::robots_data) // robots.txt
|
.service(router::robots_data) // robots.txt
|
||||||
.service(routes::index) // index page
|
.service(router::index) // index page
|
||||||
.service(routes::search) // search page
|
.service(server::routes::search::search) // search page
|
||||||
.service(routes::about) // about page
|
.service(router::about) // about page
|
||||||
.service(routes::settings) // settings page
|
.service(router::settings) // settings page
|
||||||
.default_service(web::route().to(routes::not_found)) // error page
|
.default_service(web::route().to(router::not_found)) // error page
|
||||||
})
|
})
|
||||||
.workers(cloned_config_threads_opt as usize)
|
.workers(cloned_config_threads_opt as usize)
|
||||||
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
||||||
|
181
src/models/aggregation_models.rs
Normal file
181
src/models/aggregation_models.rs
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
//! This module provides public models for handling, storing and serializing of search results
|
||||||
|
//! data scraped from the upstream search engines.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
|
use super::{engine_models::EngineError, parser_models::Style};
|
||||||
|
|
||||||
|
/// A named struct to store the raw scraped search results scraped search results from the
|
||||||
|
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
||||||
|
/// to write idiomatic rust using `Iterators`.
|
||||||
|
/// (href url in html in simple words).
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct SearchResult {
|
||||||
|
/// The title of the search result.
|
||||||
|
pub title: String,
|
||||||
|
/// The url which is accessed when clicked on it
|
||||||
|
pub url: String,
|
||||||
|
/// The description of the search result.
|
||||||
|
pub description: String,
|
||||||
|
/// The names of the upstream engines from which this results were provided.
|
||||||
|
pub engine: SmallVec<[String; 0]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchResult {
|
||||||
|
/// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `title` - The title of the search result.
|
||||||
|
/// * `url` - The url which is accessed when clicked on it
|
||||||
|
/// (href url in html in simple words).
|
||||||
|
/// * `description` - The description of the search result.
|
||||||
|
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||||
|
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
||||||
|
SearchResult {
|
||||||
|
title: title.to_owned(),
|
||||||
|
url: url.to_owned(),
|
||||||
|
description: description.to_owned(),
|
||||||
|
engine: engine.iter().map(|name| name.to_string()).collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A function which adds the engine name provided as a string into a vector of strings.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `engine` - Takes an engine name provided as a String.
|
||||||
|
pub fn add_engines(&mut self, engine: &str) {
|
||||||
|
self.engine.push(engine.to_owned())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A function which returns the engine name stored from the struct as a string.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// An engine name stored as a string from the struct.
|
||||||
|
pub fn engine(&mut self) -> String {
|
||||||
|
std::mem::take(&mut self.engine[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A named struct that stores the error info related to the upstream search engines.
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct EngineErrorInfo {
|
||||||
|
/// It stores the error type which occured while fetching the result from a particular search
|
||||||
|
/// engine.
|
||||||
|
pub error: String,
|
||||||
|
/// It stores the name of the engine that failed to provide the requested search results.
|
||||||
|
pub engine: String,
|
||||||
|
/// It stores the name of the color to indicate whether how severe the particular error is (In
|
||||||
|
/// other words it indicates the severity of the error/issue).
|
||||||
|
pub severity_color: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EngineErrorInfo {
|
||||||
|
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
||||||
|
/// search engine.
|
||||||
|
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
||||||
|
pub fn new(error: &EngineError, engine: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
error: match error {
|
||||||
|
EngineError::RequestError => "RequestError".to_owned(),
|
||||||
|
EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
|
||||||
|
EngineError::UnexpectedError => "UnexpectedError".to_owned(),
|
||||||
|
},
|
||||||
|
engine: engine.to_owned(),
|
||||||
|
severity_color: match error {
|
||||||
|
EngineError::RequestError => "green".to_owned(),
|
||||||
|
EngineError::EmptyResultSet => "blue".to_owned(),
|
||||||
|
EngineError::UnexpectedError => "red".to_owned(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
||||||
|
/// aggregated from the upstream search engines.
|
||||||
|
/// `SearchResult` structs.
|
||||||
|
#[derive(Serialize, Deserialize, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct SearchResults {
|
||||||
|
/// Stores the individual serializable `SearchResult` struct into a vector of
|
||||||
|
pub results: Vec<SearchResult>,
|
||||||
|
/// Stores the current pages search query `q` provided in the search url.
|
||||||
|
pub page_query: String,
|
||||||
|
/// Stores the theming options for the website.
|
||||||
|
pub style: Style,
|
||||||
|
/// Stores the information on which engines failed with their engine name
|
||||||
|
/// and the type of error that caused it.
|
||||||
|
pub engine_errors_info: Vec<EngineErrorInfo>,
|
||||||
|
/// Stores the flag option which holds the check value that the following
|
||||||
|
/// search query was disallowed when the safe search level set to 4 and it
|
||||||
|
/// was present in the `Blocklist` file.
|
||||||
|
pub disallowed: bool,
|
||||||
|
/// Stores the flag option which holds the check value that the following
|
||||||
|
/// search query was filtered when the safe search level set to 3 and it
|
||||||
|
/// was present in the `Blocklist` file.
|
||||||
|
pub filtered: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchResults {
|
||||||
|
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
||||||
|
/// and stores it into a vector of `SearchResult` structs.
|
||||||
|
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
||||||
|
/// the search url.
|
||||||
|
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
||||||
|
/// which engines failed with their names, reason and their severity color name.
|
||||||
|
pub fn new(
|
||||||
|
results: Vec<SearchResult>,
|
||||||
|
page_query: &str,
|
||||||
|
engine_errors_info: &[EngineErrorInfo],
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
results,
|
||||||
|
page_query: page_query.to_owned(),
|
||||||
|
style: Style::default(),
|
||||||
|
engine_errors_info: engine_errors_info.to_owned(),
|
||||||
|
disallowed: Default::default(),
|
||||||
|
filtered: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A setter function to add website style to the return search results.
|
||||||
|
pub fn add_style(&mut self, style: &Style) {
|
||||||
|
self.style = style.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A setter function that sets disallowed to true.
|
||||||
|
pub fn set_disallowed(&mut self) {
|
||||||
|
self.disallowed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A setter function to set the current page search query.
|
||||||
|
pub fn set_page_query(&mut self, page: &str) {
|
||||||
|
self.page_query = page.to_owned();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A setter function that sets the filtered to true.
|
||||||
|
pub fn set_filtered(&mut self) {
|
||||||
|
self.filtered = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A getter function that gets the value of `engine_errors_info`.
|
||||||
|
pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
|
||||||
|
std::mem::take(&mut self.engine_errors_info)
|
||||||
|
}
|
||||||
|
/// A getter function that gets the value of `results`.
|
||||||
|
pub fn results(&mut self) -> Vec<SearchResult> {
|
||||||
|
self.results.clone()
|
||||||
|
}
|
||||||
|
}
|
159
src/models/engine_models.rs
Normal file
159
src/models/engine_models.rs
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||||
|
//! the upstream search engines with the search query provided by the user.
|
||||||
|
|
||||||
|
use super::aggregation_models::SearchResult;
|
||||||
|
use error_stack::{Result, ResultExt};
|
||||||
|
use std::{collections::HashMap, fmt, time::Duration};
|
||||||
|
|
||||||
|
/// A custom error type used for handle engine associated errors.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum EngineError {
|
||||||
|
/// This variant handles all request related errors like forbidden, not found,
|
||||||
|
/// etc.
|
||||||
|
EmptyResultSet,
|
||||||
|
/// This variant handles the not results found error provide by the upstream
|
||||||
|
/// search engines.
|
||||||
|
RequestError,
|
||||||
|
/// This variant handles all the errors which are unexpected or occur rarely
|
||||||
|
/// and are errors mostly related to failure in initialization of HeaderMap,
|
||||||
|
/// Selector errors and all other errors occurring within the code handling
|
||||||
|
/// the `upstream search engines`.
|
||||||
|
UnexpectedError,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for EngineError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
EngineError::EmptyResultSet => {
|
||||||
|
write!(f, "The upstream search engine returned an empty result set")
|
||||||
|
}
|
||||||
|
EngineError::RequestError => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Error occurred while requesting data from upstream search engine"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
EngineError::UnexpectedError => {
|
||||||
|
write!(f, "An unexpected error occurred while processing the data")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl error_stack::Context for EngineError {}
|
||||||
|
|
||||||
|
/// A trait to define common behavior for all search engines.
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
pub trait SearchEngine: Sync + Send {
|
||||||
|
/// This helper function fetches/requests the search results from the upstream search engine in
|
||||||
|
/// an html form.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `url` - It takes the url of the upstream search engine with the user requested search
|
||||||
|
/// query appended in the search parameters.
|
||||||
|
/// * `header_map` - It takes the http request headers to be sent to the upstream engine in
|
||||||
|
/// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
|
||||||
|
/// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
|
||||||
|
/// the amount of time for each request to remain connected when until the results can be provided
|
||||||
|
/// by the upstream engine.
|
||||||
|
///
|
||||||
|
/// # Error
|
||||||
|
///
|
||||||
|
/// It returns the html data as a string if the upstream engine provides the data as expected
|
||||||
|
/// otherwise it returns a custom `EngineError`.
|
||||||
|
async fn fetch_html_from_upstream(
|
||||||
|
&self,
|
||||||
|
url: &str,
|
||||||
|
header_map: reqwest::header::HeaderMap,
|
||||||
|
request_timeout: u8,
|
||||||
|
) -> Result<String, EngineError> {
|
||||||
|
// fetch the html from upstream search engine
|
||||||
|
Ok(reqwest::Client::new()
|
||||||
|
.get(url)
|
||||||
|
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
||||||
|
.headers(header_map) // add spoofed headers to emulate human behavior
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.change_context(EngineError::RequestError)?
|
||||||
|
.text()
|
||||||
|
.await
|
||||||
|
.change_context(EngineError::RequestError)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This function scrapes results from the upstream engine and puts all the scraped results like
|
||||||
|
/// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
|
||||||
|
/// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
|
||||||
|
/// struct and then returns it within a Result enum.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||||
|
/// * `page` - Takes an u32 as an argument.
|
||||||
|
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||||
|
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||||
|
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||||
|
/// provide results for the requested search query and also returns error if the scraping selector
|
||||||
|
/// or HeaderMap fails to initialize.
|
||||||
|
async fn results(
|
||||||
|
&self,
|
||||||
|
query: &str,
|
||||||
|
page: u32,
|
||||||
|
user_agent: &str,
|
||||||
|
request_timeout: u8,
|
||||||
|
safe_search: u8,
|
||||||
|
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A named struct which stores the engine struct with the name of the associated engine.
|
||||||
|
pub struct EngineHandler {
|
||||||
|
/// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
|
||||||
|
/// the `SearchEngine` trait.
|
||||||
|
engine: Box<dyn SearchEngine>,
|
||||||
|
/// It stores the name of the engine to which the struct is associated to.
|
||||||
|
name: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for EngineHandler {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self::new(self.name).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EngineHandler {
|
||||||
|
/// Parses an engine name into an engine handler.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `engine_name` - It takes the name of the engine to which the struct was associated to.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// It returns an option either containing the value or a none if the engine is unknown
|
||||||
|
pub fn new(engine_name: &str) -> Option<Self> {
|
||||||
|
let engine: (&'static str, Box<dyn SearchEngine>) =
|
||||||
|
match engine_name.to_lowercase().as_str() {
|
||||||
|
"duckduckgo" => (
|
||||||
|
"duckduckgo",
|
||||||
|
Box::new(crate::engines::duckduckgo::DuckDuckGo),
|
||||||
|
),
|
||||||
|
"searx" => ("searx", Box::new(crate::engines::searx::Searx)),
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
engine: engine.1,
|
||||||
|
name: engine.0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This function converts the EngineHandler type into a tuple containing the engine name and
|
||||||
|
/// the associated engine struct.
|
||||||
|
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
||||||
|
(self.name, self.engine)
|
||||||
|
}
|
||||||
|
}
|
8
src/models/mod.rs
Normal file
8
src/models/mod.rs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
//! This module provides modules which in turn provides various models for aggregrating search
|
||||||
|
//! results, parsing config file, providing trait to standardize search engine handling code,
|
||||||
|
//! custom engine error for the search engine, etc.
|
||||||
|
|
||||||
|
pub mod aggregation_models;
|
||||||
|
pub mod engine_models;
|
||||||
|
pub mod parser_models;
|
||||||
|
pub mod server_models;
|
@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
|
|||||||
/// order to allow the deserializing the json back to struct in aggregate function in
|
/// order to allow the deserializing the json back to struct in aggregate function in
|
||||||
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
||||||
/// it to the template files.
|
/// it to the template files.
|
||||||
///
|
#[derive(Serialize, Deserialize, Clone, Default)]
|
||||||
/// # Fields
|
|
||||||
//
|
|
||||||
/// * `theme` - It stores the parsed theme option used to set a theme for the website.
|
|
||||||
/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
|
|
||||||
/// theme being used.
|
|
||||||
#[derive(Serialize, Deserialize, Clone)]
|
|
||||||
pub struct Style {
|
pub struct Style {
|
||||||
|
/// It stores the parsed theme option used to set a theme for the website.
|
||||||
pub theme: String,
|
pub theme: String,
|
||||||
|
/// It stores the parsed colorscheme option used to set a colorscheme for the
|
||||||
|
/// theme being used.
|
||||||
pub colorscheme: String,
|
pub colorscheme: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -36,3 +33,20 @@ impl Style {
|
|||||||
Style { theme, colorscheme }
|
Style { theme, colorscheme }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Configuration options for the aggregator.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct AggregatorConfig {
|
||||||
|
/// It stores the option to whether enable or disable random delays between
|
||||||
|
/// requests.
|
||||||
|
pub random_delay: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Configuration options for the rate limiter middleware.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct RateLimiter {
|
||||||
|
/// The number of request that are allowed within a provided time limit.
|
||||||
|
pub number_of_requests: u8,
|
||||||
|
/// The time limit in which the quantity of requests that should be accepted.
|
||||||
|
pub time_limit: u8,
|
||||||
|
}
|
26
src/models/server_models.rs
Normal file
26
src/models/server_models.rs
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
//! This module provides the models to parse cookies and search parameters from the search
|
||||||
|
//! engine website.
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct SearchParams {
|
||||||
|
/// It stores the search parameter option `q` (or query in simple words)
|
||||||
|
/// of the search url.
|
||||||
|
pub q: Option<String>,
|
||||||
|
/// It stores the search parameter `page` (or pageno in simple words)
|
||||||
|
/// of the search url.
|
||||||
|
pub page: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct Cookie {
|
||||||
|
/// It stores the theme name used in the website.
|
||||||
|
pub theme: String,
|
||||||
|
/// It stores the colorscheme name used for the website theme.
|
||||||
|
pub colorscheme: String,
|
||||||
|
/// It stores the user selected upstream search engines selected from the UI.
|
||||||
|
pub engines: Vec<String>,
|
||||||
|
}
|
@ -1,142 +0,0 @@
|
|||||||
//! This module provides public models for handling, storing and serializing of search results
|
|
||||||
//! data scraped from the upstream search engines.
|
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
|
||||||
|
|
||||||
/// A named struct to store the raw scraped search results scraped search results from the
|
|
||||||
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
|
||||||
/// to write idiomatic rust using `Iterators`.
|
|
||||||
///
|
|
||||||
/// # Fields
|
|
||||||
///
|
|
||||||
/// * `title` - The title of the search result.
|
|
||||||
/// * `url` - The url which is accessed when clicked on it
|
|
||||||
/// (href url in html in simple words).
|
|
||||||
/// * `description` - The description of the search result.
|
|
||||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
|
||||||
#[derive(Clone, Serialize, Deserialize)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct SearchResult {
|
|
||||||
pub title: String,
|
|
||||||
pub url: String,
|
|
||||||
pub description: String,
|
|
||||||
pub engine: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SearchResult {
|
|
||||||
/// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `title` - The title of the search result.
|
|
||||||
/// * `url` - The url which is accessed when clicked on it
|
|
||||||
/// (href url in html in simple words).
|
|
||||||
/// * `description` - The description of the search result.
|
|
||||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
|
||||||
pub fn new(title: String, url: String, description: String, engine: Vec<String>) -> Self {
|
|
||||||
SearchResult {
|
|
||||||
title,
|
|
||||||
url,
|
|
||||||
description,
|
|
||||||
engine,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A function which adds the engine name provided as a string into a vector of strings.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `engine` - Takes an engine name provided as a String.
|
|
||||||
pub fn add_engines(&mut self, engine: String) {
|
|
||||||
self.engine.push(engine)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A function which returns the engine name stored from the struct as a string.
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
///
|
|
||||||
/// An engine name stored as a string from the struct.
|
|
||||||
pub fn engine(self) -> String {
|
|
||||||
self.engine.get(0).unwrap().to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
pub struct EngineErrorInfo {
|
|
||||||
pub error: String,
|
|
||||||
pub engine: String,
|
|
||||||
pub severity_color: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EngineErrorInfo {
|
|
||||||
pub fn new(error: &EngineError, engine: String) -> Self {
|
|
||||||
Self {
|
|
||||||
error: match error {
|
|
||||||
EngineError::RequestError => String::from("RequestError"),
|
|
||||||
EngineError::EmptyResultSet => String::from("EmptyResultSet"),
|
|
||||||
EngineError::UnexpectedError => String::from("UnexpectedError"),
|
|
||||||
},
|
|
||||||
engine,
|
|
||||||
severity_color: match error {
|
|
||||||
EngineError::RequestError => String::from("green"),
|
|
||||||
EngineError::EmptyResultSet => String::from("blue"),
|
|
||||||
EngineError::UnexpectedError => String::from("red"),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
|
||||||
/// aggregated from the upstream search engines.
|
|
||||||
///
|
|
||||||
/// # Fields
|
|
||||||
///
|
|
||||||
/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
|
|
||||||
/// `SearchResult` structs.
|
|
||||||
/// * `page_query` - Stores the current pages search query `q` provided in the search url.
|
|
||||||
/// * `style` - Stores the theming options for the website.
|
|
||||||
/// * `engine_errors_info` - Stores the information on which engines failed with their engine name
|
|
||||||
/// and the type of error that caused it.
|
|
||||||
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
|
||||||
/// given search query.
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct SearchResults {
|
|
||||||
pub results: Vec<SearchResult>,
|
|
||||||
pub page_query: String,
|
|
||||||
pub style: Style,
|
|
||||||
pub engine_errors_info: Vec<EngineErrorInfo>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SearchResults {
|
|
||||||
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
|
||||||
/// and stores it into a vector of `SearchResult` structs.
|
|
||||||
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
|
||||||
/// the search url.
|
|
||||||
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
|
||||||
/// given search query.
|
|
||||||
pub fn new(
|
|
||||||
results: Vec<SearchResult>,
|
|
||||||
page_query: String,
|
|
||||||
engine_errors_info: Vec<EngineErrorInfo>,
|
|
||||||
) -> Self {
|
|
||||||
SearchResults {
|
|
||||||
results,
|
|
||||||
page_query,
|
|
||||||
style: Style::new("".to_string(), "".to_string()),
|
|
||||||
engine_errors_info,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A setter function to add website style to the return search results.
|
|
||||||
pub fn add_style(&mut self, style: Style) {
|
|
||||||
self.style = style;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,27 +1,23 @@
|
|||||||
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
||||||
//! search engines and then removes duplicate results.
|
//! search engines and then removes duplicate results.
|
||||||
|
|
||||||
|
use super::user_agent::random_user_agent;
|
||||||
|
use crate::handler::paths::{file_path, FileType};
|
||||||
|
use crate::models::{
|
||||||
|
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
||||||
|
engine_models::{EngineError, EngineHandler},
|
||||||
|
};
|
||||||
|
use error_stack::Report;
|
||||||
|
use rand::Rng;
|
||||||
|
use regex::Regex;
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
io::{BufReader, Read},
|
io::{BufReader, Read},
|
||||||
time::Duration,
|
time::Duration,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
|
||||||
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
|
||||||
user_agent::random_user_agent,
|
|
||||||
};
|
|
||||||
use error_stack::Report;
|
|
||||||
use rand::Rng;
|
|
||||||
use regex::Regex;
|
|
||||||
use std::{fs::File, io::BufRead};
|
use std::{fs::File, io::BufRead};
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
|
|
||||||
use crate::{
|
|
||||||
engines::engine_models::{EngineError, EngineHandler},
|
|
||||||
handler::paths::{file_path, FileType},
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Aliases for long type annotations
|
/// Aliases for long type annotations
|
||||||
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
||||||
|
|
||||||
@ -64,14 +60,15 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|||||||
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
||||||
/// containing appropriate values.
|
/// containing appropriate values.
|
||||||
pub async fn aggregate(
|
pub async fn aggregate(
|
||||||
query: String,
|
query: &str,
|
||||||
page: u32,
|
page: u32,
|
||||||
random_delay: bool,
|
random_delay: bool,
|
||||||
debug: bool,
|
debug: bool,
|
||||||
upstream_search_engines: Vec<EngineHandler>,
|
upstream_search_engines: &[EngineHandler],
|
||||||
request_timeout: u8,
|
request_timeout: u8,
|
||||||
|
safe_search: u8,
|
||||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||||
let user_agent: String = random_user_agent();
|
let user_agent: &str = random_user_agent();
|
||||||
|
|
||||||
// Add a random delay before making the request.
|
// Add a random delay before making the request.
|
||||||
if random_delay || !debug {
|
if random_delay || !debug {
|
||||||
@ -80,19 +77,24 @@ pub async fn aggregate(
|
|||||||
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut names: Vec<&str> = vec![];
|
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||||
|
|
||||||
// create tasks for upstream result fetching
|
// create tasks for upstream result fetching
|
||||||
let mut tasks: FutureVec = FutureVec::new();
|
let mut tasks: FutureVec = FutureVec::new();
|
||||||
|
|
||||||
for engine_handler in upstream_search_engines {
|
for engine_handler in upstream_search_engines {
|
||||||
let (name, search_engine) = engine_handler.into_name_engine();
|
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
||||||
names.push(name);
|
names.push(name);
|
||||||
let query: String = query.clone();
|
let query: String = query.to_owned();
|
||||||
let user_agent: String = user_agent.clone();
|
|
||||||
tasks.push(tokio::spawn(async move {
|
tasks.push(tokio::spawn(async move {
|
||||||
search_engine
|
search_engine
|
||||||
.results(query, page, user_agent.clone(), request_timeout)
|
.results(
|
||||||
|
&query,
|
||||||
|
page,
|
||||||
|
user_agent.clone(),
|
||||||
|
request_timeout,
|
||||||
|
safe_search,
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@ -110,7 +112,7 @@ pub async fn aggregate(
|
|||||||
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
||||||
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
||||||
|
|
||||||
let mut handle_error = |error: Report<EngineError>, engine_name: String| {
|
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
||||||
log::error!("Engine Error: {:?}", error);
|
log::error!("Engine Error: {:?}", error);
|
||||||
engine_errors_info.push(EngineErrorInfo::new(
|
engine_errors_info.push(EngineErrorInfo::new(
|
||||||
error.downcast_ref::<EngineError>().unwrap(),
|
error.downcast_ref::<EngineError>().unwrap(),
|
||||||
@ -120,7 +122,7 @@ pub async fn aggregate(
|
|||||||
|
|
||||||
for _ in 0..responses.len() {
|
for _ in 0..responses.len() {
|
||||||
let response = responses.pop().unwrap();
|
let response = responses.pop().unwrap();
|
||||||
let engine = names.pop().unwrap().to_string();
|
let engine = names.pop().unwrap();
|
||||||
|
|
||||||
if result_map.is_empty() {
|
if result_map.is_empty() {
|
||||||
match response {
|
match response {
|
||||||
@ -128,7 +130,7 @@ pub async fn aggregate(
|
|||||||
result_map = results.clone();
|
result_map = results.clone();
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
handle_error(error, engine);
|
handle_error(&error, engine);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
@ -140,39 +142,37 @@ pub async fn aggregate(
|
|||||||
result_map
|
result_map
|
||||||
.entry(key)
|
.entry(key)
|
||||||
.and_modify(|result| {
|
.and_modify(|result| {
|
||||||
result.add_engines(engine.clone());
|
result.add_engines(engine);
|
||||||
})
|
})
|
||||||
.or_insert_with(|| -> SearchResult { value });
|
.or_insert_with(|| -> SearchResult { value });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
handle_error(error, engine);
|
handle_error(&error, engine);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
if safe_search >= 3 {
|
||||||
filter_with_lists(
|
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
||||||
&mut result_map,
|
filter_with_lists(
|
||||||
&mut blacklist_map,
|
&mut result_map,
|
||||||
&file_path(FileType::BlockList)?,
|
&mut blacklist_map,
|
||||||
)?;
|
file_path(FileType::BlockList)?,
|
||||||
|
)?;
|
||||||
|
|
||||||
filter_with_lists(
|
filter_with_lists(
|
||||||
&mut blacklist_map,
|
&mut blacklist_map,
|
||||||
&mut result_map,
|
&mut result_map,
|
||||||
&file_path(FileType::AllowList)?,
|
file_path(FileType::AllowList)?,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
drop(blacklist_map);
|
drop(blacklist_map);
|
||||||
|
}
|
||||||
|
|
||||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
let results: Vec<SearchResult> = result_map.into_values().collect();
|
||||||
|
|
||||||
Ok(SearchResults::new(
|
Ok(SearchResults::new(results, query, &engine_errors_info))
|
||||||
results,
|
|
||||||
query.to_string(),
|
|
||||||
engine_errors_info,
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Filters a map of search results using a list of regex patterns.
|
/// Filters a map of search results using a list of regex patterns.
|
||||||
@ -194,7 +194,7 @@ pub fn filter_with_lists(
|
|||||||
let mut reader = BufReader::new(File::open(file_path)?);
|
let mut reader = BufReader::new(File::open(file_path)?);
|
||||||
|
|
||||||
for line in reader.by_ref().lines() {
|
for line in reader.by_ref().lines() {
|
||||||
let re = Regex::new(&line?)?;
|
let re = Regex::new(line?.trim())?;
|
||||||
|
|
||||||
// Iterate over each search result in the map and check if it matches the regex pattern
|
// Iterate over each search result in the map and check if it matches the regex pattern
|
||||||
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
||||||
@ -203,7 +203,10 @@ pub fn filter_with_lists(
|
|||||||
|| re.is_match(&search_result.description.to_lowercase())
|
|| re.is_match(&search_result.description.to_lowercase())
|
||||||
{
|
{
|
||||||
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||||
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
resultant_map.insert(
|
||||||
|
url.to_owned(),
|
||||||
|
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -214,6 +217,7 @@ pub fn filter_with_lists(
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use smallvec::smallvec;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
@ -223,22 +227,22 @@ mod tests {
|
|||||||
// Create a map of search results to filter
|
// Create a map of search results to filter
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.example.com".to_string(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_string(),
|
title: "Example Domain".to_owned(),
|
||||||
url: "https://www.example.com".to_string(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_string(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.rust-lang.org/".to_string(),
|
"https://www.rust-lang.org/".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Rust Programming Language".to_string(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_string(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -267,22 +271,22 @@ mod tests {
|
|||||||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.example.com".to_string(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_string(),
|
title: "Example Domain".to_owned(),
|
||||||
url: "https://www.example.com".to_string(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_string(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.rust-lang.org/".to_string(),
|
"https://www.rust-lang.org/".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Rust Programming Language".to_string(),
|
title: "Rust Programming Language".to_owned(),
|
||||||
url: "https://www.rust-lang.org/".to_string(),
|
url: "https://www.rust-lang.org/".to_owned(),
|
||||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -327,13 +331,13 @@ mod tests {
|
|||||||
fn test_filter_with_lists_invalid_regex() {
|
fn test_filter_with_lists_invalid_regex() {
|
||||||
let mut map_to_be_filtered = HashMap::new();
|
let mut map_to_be_filtered = HashMap::new();
|
||||||
map_to_be_filtered.insert(
|
map_to_be_filtered.insert(
|
||||||
"https://www.example.com".to_string(),
|
"https://www.example.com".to_owned(),
|
||||||
SearchResult {
|
SearchResult {
|
||||||
title: "Example Domain".to_string(),
|
title: "Example Domain".to_owned(),
|
||||||
url: "https://www.example.com".to_string(),
|
url: "https://www.example.com".to_owned(),
|
||||||
description: "This domain is for use in illustrative examples in documents."
|
description: "This domain is for use in illustrative examples in documents."
|
||||||
.to_string(),
|
.to_owned(),
|
||||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
pub mod aggregation_models;
|
//! This module provides modules that handle the functionality to aggregate the fetched search
|
||||||
|
//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
|
||||||
|
//! provides various models to aggregate search results into a standardized form.
|
||||||
|
|
||||||
pub mod aggregator;
|
pub mod aggregator;
|
||||||
pub mod user_agent;
|
pub mod user_agent;
|
||||||
|
@ -1,28 +1,34 @@
|
|||||||
//! This module provides the functionality to generate random user agent string.
|
//! This module provides the functionality to generate random user agent string.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
||||||
|
|
||||||
static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
|
/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
|
||||||
UserAgentsBuilder::new()
|
/// again and again without the need of reinitializing the `UserAgents` struct.
|
||||||
.cache(false)
|
static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
|
||||||
.dir("/tmp")
|
|
||||||
.thread(1)
|
|
||||||
.set_browsers(
|
|
||||||
Browsers::new()
|
|
||||||
.set_chrome()
|
|
||||||
.set_safari()
|
|
||||||
.set_edge()
|
|
||||||
.set_firefox()
|
|
||||||
.set_mozilla(),
|
|
||||||
)
|
|
||||||
.build()
|
|
||||||
});
|
|
||||||
|
|
||||||
/// A function to generate random user agent to improve privacy of the user.
|
/// A function to generate random user agent to improve privacy of the user.
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// A randomly generated user agent string.
|
/// A randomly generated user agent string.
|
||||||
pub fn random_user_agent() -> String {
|
pub fn random_user_agent() -> &'static str {
|
||||||
USER_AGENTS.random().to_string()
|
USER_AGENTS
|
||||||
|
.get_or_init(|| {
|
||||||
|
UserAgentsBuilder::new()
|
||||||
|
.cache(false)
|
||||||
|
.dir("/tmp")
|
||||||
|
.thread(1)
|
||||||
|
.set_browsers(
|
||||||
|
Browsers::new()
|
||||||
|
.set_chrome()
|
||||||
|
.set_safari()
|
||||||
|
.set_edge()
|
||||||
|
.set_firefox()
|
||||||
|
.set_mozilla(),
|
||||||
|
)
|
||||||
|
.build()
|
||||||
|
})
|
||||||
|
.random()
|
||||||
}
|
}
|
||||||
|
@ -1 +1,7 @@
|
|||||||
|
//! This module provides modules that handle the functionality of handling different routes/paths
|
||||||
|
//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
|
||||||
|
//! the search route. Also, caches the next, current and previous search results in the search
|
||||||
|
//! routes with the help of the redis server.
|
||||||
|
|
||||||
|
pub mod router;
|
||||||
pub mod routes;
|
pub mod routes;
|
||||||
|
64
src/server/router.rs
Normal file
64
src/server/router.rs
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
//! This module provides the functionality to handle different routes of the `websurfx`
|
||||||
|
//! meta search engine website and provide appropriate response to each route/page
|
||||||
|
//! when requested.
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::parser::Config,
|
||||||
|
handler::paths::{file_path, FileType},
|
||||||
|
};
|
||||||
|
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||||
|
use handlebars::Handlebars;
|
||||||
|
use std::fs::read_to_string;
|
||||||
|
|
||||||
|
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/")]
|
||||||
|
pub async fn index(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("index", &config.style).unwrap();
|
||||||
|
Ok(HttpResponse::Ok().body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of any other accessed route/page which is not provided by the
|
||||||
|
/// website essentially the 404 error page.
|
||||||
|
pub async fn not_found(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("404", &config.style)?;
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok()
|
||||||
|
.content_type("text/html; charset=utf-8")
|
||||||
|
.body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/robots.txt")]
|
||||||
|
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String =
|
||||||
|
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
|
||||||
|
Ok(HttpResponse::Ok()
|
||||||
|
.content_type("text/plain; charset=ascii")
|
||||||
|
.body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of about page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/about")]
|
||||||
|
pub async fn about(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("about", &config.style)?;
|
||||||
|
Ok(HttpResponse::Ok().body(page_content))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles the route of settings page of the `websurfx` meta search engine website.
|
||||||
|
#[get("/settings")]
|
||||||
|
pub async fn settings(
|
||||||
|
hbs: web::Data<Handlebars<'_>>,
|
||||||
|
config: web::Data<Config>,
|
||||||
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||||
|
let page_content: String = hbs.render("settings", &config.style)?;
|
||||||
|
Ok(HttpResponse::Ok().body(page_content))
|
||||||
|
}
|
3
src/server/routes/mod.rs
Normal file
3
src/server/routes/mod.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
//! This module provides modules to handle various routes in the search engine website.
|
||||||
|
|
||||||
|
pub mod search;
|
@ -1,33 +1,38 @@
|
|||||||
//! This module provides the functionality to handle different routes of the `websurfx`
|
//! This module handles the search route of the search engine website.
|
||||||
//! meta search engine website and provide appropriate response to each route/page
|
|
||||||
//! when requested.
|
|
||||||
|
|
||||||
use std::fs::read_to_string;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
cache::cacher::RedisCache,
|
cache::cacher::RedisCache,
|
||||||
config::parser::Config,
|
config::parser::Config,
|
||||||
engines::engine_models::EngineHandler,
|
|
||||||
handler::paths::{file_path, FileType},
|
handler::paths::{file_path, FileType},
|
||||||
results::{aggregation_models::SearchResults, aggregator::aggregate},
|
models::{aggregation_models::SearchResults, engine_models::EngineHandler},
|
||||||
|
results::aggregator::aggregate,
|
||||||
};
|
};
|
||||||
use actix_web::{get, web, HttpRequest, HttpResponse};
|
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||||
use handlebars::Handlebars;
|
use handlebars::Handlebars;
|
||||||
|
use regex::Regex;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
use std::{
|
||||||
|
fs::{read_to_string, File},
|
||||||
|
io::{BufRead, BufReader, Read},
|
||||||
|
};
|
||||||
use tokio::join;
|
use tokio::join;
|
||||||
|
|
||||||
|
// ---- Constants ----
|
||||||
|
/// Initialize redis cache connection once and store it on the heap.
|
||||||
|
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
||||||
|
|
||||||
/// A named struct which deserializes all the user provided search parameters and stores them.
|
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||||
///
|
|
||||||
/// # Fields
|
|
||||||
///
|
|
||||||
/// * `q` - It stores the search parameter option `q` (or query in simple words)
|
|
||||||
/// of the search url.
|
|
||||||
/// * `page` - It stores the search parameter `page` (or pageno in simple words)
|
|
||||||
/// of the search url.
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct SearchParams {
|
pub struct SearchParams {
|
||||||
|
/// It stores the search parameter option `q` (or query in simple words)
|
||||||
|
/// of the search url.
|
||||||
q: Option<String>,
|
q: Option<String>,
|
||||||
|
/// It stores the search parameter `page` (or pageno in simple words)
|
||||||
|
/// of the search url.
|
||||||
page: Option<u32>,
|
page: Option<u32>,
|
||||||
|
/// It stores the search parameter `safesearch` (or safe search level in simple words) of the
|
||||||
|
/// search url.
|
||||||
|
safesearch: Option<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
||||||
@ -54,18 +59,15 @@ pub async fn not_found(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
||||||
///
|
|
||||||
/// # Fields
|
|
||||||
///
|
|
||||||
/// * `theme` - It stores the theme name used in the website.
|
|
||||||
/// * `colorscheme` - It stores the colorscheme name used for the website theme.
|
|
||||||
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct Cookie {
|
struct Cookie<'a> {
|
||||||
theme: String,
|
/// It stores the theme name used in the website.
|
||||||
colorscheme: String,
|
theme: &'a str,
|
||||||
engines: Vec<String>,
|
/// It stores the colorscheme name used for the website theme.
|
||||||
|
colorscheme: &'a str,
|
||||||
|
/// It stores the user selected upstream search engines selected from the UI.
|
||||||
|
engines: Vec<&'a str>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
||||||
@ -101,42 +103,58 @@ pub async fn search(
|
|||||||
None => 1,
|
None => 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let safe_search: u8 = match config.safe_search {
|
||||||
|
3..=4 => config.safe_search,
|
||||||
|
_ => match ¶ms.safesearch {
|
||||||
|
Some(safesearch) => match safesearch {
|
||||||
|
0..=2 => *safesearch,
|
||||||
|
_ => 1,
|
||||||
|
},
|
||||||
|
None => config.safe_search,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
let (_, results, _) = join!(
|
let (_, results, _) = join!(
|
||||||
results(
|
results(
|
||||||
format!(
|
format!(
|
||||||
"http://{}:{}/search?q={}&page={}",
|
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
||||||
config.binding_ip,
|
config.binding_ip,
|
||||||
config.port,
|
config.port,
|
||||||
query,
|
query,
|
||||||
page - 1
|
page - 1,
|
||||||
|
safe_search
|
||||||
),
|
),
|
||||||
&config,
|
&config,
|
||||||
query.to_string(),
|
query,
|
||||||
page - 1,
|
page - 1,
|
||||||
req.clone(),
|
req.clone(),
|
||||||
|
safe_search
|
||||||
),
|
),
|
||||||
results(
|
results(
|
||||||
format!(
|
format!(
|
||||||
"http://{}:{}/search?q={}&page={}",
|
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
||||||
config.binding_ip, config.port, query, page
|
config.binding_ip, config.port, query, page, safe_search
|
||||||
),
|
),
|
||||||
&config,
|
&config,
|
||||||
query.to_string(),
|
query,
|
||||||
page,
|
page,
|
||||||
req.clone(),
|
req.clone(),
|
||||||
|
safe_search
|
||||||
),
|
),
|
||||||
results(
|
results(
|
||||||
format!(
|
format!(
|
||||||
"http://{}:{}/search?q={}&page={}",
|
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
||||||
config.binding_ip,
|
config.binding_ip,
|
||||||
config.port,
|
config.port,
|
||||||
query,
|
query,
|
||||||
page + 1
|
page + 1,
|
||||||
|
safe_search
|
||||||
),
|
),
|
||||||
&config,
|
&config,
|
||||||
query.to_string(),
|
query,
|
||||||
page + 1,
|
page + 1,
|
||||||
req.clone(),
|
req.clone(),
|
||||||
|
safe_search
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -149,35 +167,72 @@ pub async fn search(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetches the results for a query and page.
|
/// Fetches the results for a query and page. It First checks the redis cache, if that
|
||||||
/// First checks the redis cache, if that fails it gets proper results
|
/// fails it gets proper results by requesting from the upstream search engines.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `url` - It takes the url of the current page that requested the search results for a
|
||||||
|
/// particular search query.
|
||||||
|
/// * `config` - It takes a parsed config struct.
|
||||||
|
/// * `query` - It takes the page number as u32 value.
|
||||||
|
/// * `req` - It takes the `HttpRequest` struct as a value.
|
||||||
|
///
|
||||||
|
/// # Error
|
||||||
|
///
|
||||||
|
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
||||||
|
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
||||||
async fn results(
|
async fn results(
|
||||||
url: String,
|
url: String,
|
||||||
config: &Config,
|
config: &Config,
|
||||||
query: String,
|
query: &str,
|
||||||
page: u32,
|
page: u32,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
|
safe_search: u8,
|
||||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||||
//Initialize redis cache connection struct
|
// Initialize redis cache connection struct
|
||||||
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
|
let mut redis_cache: RedisCache = REDIS_CACHE
|
||||||
|
.get_or_init(async {
|
||||||
|
// Initialize redis cache connection pool only one and store it in the heap.
|
||||||
|
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.clone();
|
||||||
// fetch the cached results json.
|
// fetch the cached results json.
|
||||||
let cached_results_json = redis_cache.cached_json(&url);
|
let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
|
||||||
|
redis_cache.clone().cached_json(&url).await;
|
||||||
// check if fetched cache results was indeed fetched or it was an error and if so
|
// check if fetched cache results was indeed fetched or it was an error and if so
|
||||||
// handle the data accordingly.
|
// handle the data accordingly.
|
||||||
match cached_results_json {
|
match cached_results_json {
|
||||||
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results).unwrap()),
|
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
|
if safe_search == 4 {
|
||||||
|
let mut results: SearchResults = SearchResults::default();
|
||||||
|
let mut _flag: bool =
|
||||||
|
is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
||||||
|
_flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
|
||||||
|
|
||||||
|
if _flag {
|
||||||
|
results.set_disallowed();
|
||||||
|
results.add_style(&config.style);
|
||||||
|
results.set_page_query(query);
|
||||||
|
redis_cache
|
||||||
|
.cache_results(&serde_json::to_string(&results)?, &url)
|
||||||
|
.await?;
|
||||||
|
return Ok(results);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// check if the cookie value is empty or not if it is empty then use the
|
// check if the cookie value is empty or not if it is empty then use the
|
||||||
// default selected upstream search engines from the config file otherwise
|
// default selected upstream search engines from the config file otherwise
|
||||||
// parse the non-empty cookie and grab the user selected engines from the
|
// parse the non-empty cookie and grab the user selected engines from the
|
||||||
// UI and use that.
|
// UI and use that.
|
||||||
let mut results: crate::results::aggregation_models::SearchResults = match req
|
let mut results: SearchResults = match req.cookie("appCookie") {
|
||||||
.cookie("appCookie")
|
|
||||||
{
|
|
||||||
Some(cookie_value) => {
|
Some(cookie_value) => {
|
||||||
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
let cookie_value: Cookie<'_> =
|
||||||
|
serde_json::from_str(cookie_value.name_value().1)?;
|
||||||
|
|
||||||
let engines = cookie_value
|
let engines: Vec<EngineHandler> = cookie_value
|
||||||
.engines
|
.engines
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|name| EngineHandler::new(name))
|
.filter_map(|name| EngineHandler::new(name))
|
||||||
@ -188,8 +243,9 @@ async fn results(
|
|||||||
page,
|
page,
|
||||||
config.aggregator.random_delay,
|
config.aggregator.random_delay,
|
||||||
config.debug,
|
config.debug,
|
||||||
engines,
|
&engines,
|
||||||
config.request_timeout,
|
config.request_timeout,
|
||||||
|
safe_search,
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
@ -199,19 +255,43 @@ async fn results(
|
|||||||
page,
|
page,
|
||||||
config.aggregator.random_delay,
|
config.aggregator.random_delay,
|
||||||
config.debug,
|
config.debug,
|
||||||
config.upstream_search_engines.clone(),
|
&config.upstream_search_engines,
|
||||||
config.request_timeout,
|
config.request_timeout,
|
||||||
|
safe_search,
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
results.add_style(config.style.clone());
|
if results.engine_errors_info().is_empty() && results.results().is_empty() {
|
||||||
redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
|
results.set_filtered();
|
||||||
|
}
|
||||||
|
results.add_style(&config.style);
|
||||||
|
redis_cache
|
||||||
|
.cache_results(&serde_json::to_string(&results)?, &url)
|
||||||
|
.await?;
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A helper function which checks whether the search query contains any keywords which should be
|
||||||
|
/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
|
||||||
|
fn is_match_from_filter_list(
|
||||||
|
file_path: &str,
|
||||||
|
query: &str,
|
||||||
|
) -> Result<bool, Box<dyn std::error::Error>> {
|
||||||
|
let mut flag = false;
|
||||||
|
let mut reader = BufReader::new(File::open(file_path)?);
|
||||||
|
for line in reader.by_ref().lines() {
|
||||||
|
let re = Regex::new(&line?)?;
|
||||||
|
if re.is_match(query) {
|
||||||
|
flag = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(flag)
|
||||||
|
}
|
||||||
|
|
||||||
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
||||||
#[get("/robots.txt")]
|
#[get("/robots.txt")]
|
||||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
@ -10,6 +10,21 @@ production_use = false -- whether to use production mode or not (in other words
|
|||||||
-- if production_use is set to true
|
-- if production_use is set to true
|
||||||
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
||||||
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
||||||
|
rate_limiter = {
|
||||||
|
number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
|
||||||
|
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
||||||
|
}
|
||||||
|
|
||||||
|
-- ### Search ###
|
||||||
|
-- Filter results based on different levels. The levels provided are:
|
||||||
|
-- {{
|
||||||
|
-- 0 - None
|
||||||
|
-- 1 - Low
|
||||||
|
-- 2 - Moderate
|
||||||
|
-- 3 - High
|
||||||
|
-- 4 - Aggressive
|
||||||
|
-- }}
|
||||||
|
safe_search = 2
|
||||||
|
|
||||||
-- ### Website ###
|
-- ### Website ###
|
||||||
-- The different colorschemes provided are:
|
-- The different colorschemes provided are:
|
||||||
@ -34,4 +49,7 @@ theme = "simple" -- the theme name which should be used for the website
|
|||||||
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
||||||
|
|
||||||
-- ### Search Engines ###
|
-- ### Search Engines ###
|
||||||
upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
|
upstream_search_engines = {
|
||||||
|
DuckDuckGo = true,
|
||||||
|
Searx = false,
|
||||||
|
} -- select the upstream search engines from which the results should be fetched.
|
||||||
|
Loading…
Reference in New Issue
Block a user