mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 05:58:21 -05:00
Merge branch 'rolling' into feat-rate-limiter-for-websurfx
This commit is contained in:
commit
2790eefba8
12
Cargo.lock
generated
12
Cargo.lock
generated
@ -544,18 +544,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.4.1"
|
||||
version = "4.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c8d502cbaec4595d2e7d5f61e318f05417bd2b66fdc3809498f0d3fdf0bea27"
|
||||
checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.4.1"
|
||||
version = "4.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5891c7bc0edb3e1c2204fc5e94009affabeb1821c9e5fdc3959536c5c0bb984d"
|
||||
checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
@ -2764,9 +2764,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.10"
|
||||
version = "0.38.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed6248e1caa625eb708e266e06159f135e8c26f2bb7ceb72dc4b2766d0340964"
|
||||
checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453"
|
||||
dependencies = [
|
||||
"bitflags 2.4.0",
|
||||
"errno",
|
||||
|
@ -20,8 +20,8 @@ serde_json = {version="1.0.105"}
|
||||
fake-useragent = {version="0.1.3"}
|
||||
env_logger = {version="0.10.0"}
|
||||
log = {version="0.4.20"}
|
||||
rlua = {version="0.19.7"}
|
||||
redis = {version="0.23.3"}
|
||||
mlua = {version="0.8.10", features=["luajit"]}
|
||||
redis = {version="0.23.3", features=["tokio-comp","connection-manager"]}
|
||||
md5 = {version="0.7.0"}
|
||||
rand={version="0.8.5"}
|
||||
once_cell = {version="1.18.0"}
|
||||
|
@ -51,7 +51,7 @@
|
||||
- **Getting Started**
|
||||
- [🔭 Preview](#preview-)
|
||||
- [🚀 Features](#features-)
|
||||
- [🛠️ Installation and Testing](#installation-and-testing-)
|
||||
- [🛠️ Installation and Testing](#installation-and-testing-%EF%B8%8F)
|
||||
- [🔧 Configuration](#configuration-)
|
||||
- **Feature Overview**
|
||||
- [🎨 Theming](#theming-)
|
||||
|
BIN
public/images/barricade.png
Normal file
BIN
public/images/barricade.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 892 KiB |
BIN
public/images/filter.png
Normal file
BIN
public/images/filter.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
@ -132,6 +132,35 @@ body {
|
||||
width: 1.2rem;
|
||||
height: 1.2rem;
|
||||
}
|
||||
.results .result_disallowed,
|
||||
.results .result_filtered {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 10rem;
|
||||
font-size: 2rem;
|
||||
color: var(--foreground-color);
|
||||
margin: 0rem 7rem;
|
||||
}
|
||||
|
||||
.results .result_disallowed .user_query,
|
||||
.results .result_filtered .user_query {
|
||||
color: var(--background-color);
|
||||
font-weight: 300;
|
||||
}
|
||||
|
||||
.results .result_disallowed img,
|
||||
.results .result_filtered img {
|
||||
width: 30rem;
|
||||
}
|
||||
|
||||
.results .result_disallowed div,
|
||||
.results .result_filtered div {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1rem;
|
||||
line-break: strict;
|
||||
}
|
||||
|
||||
/* styles for the footer and header */
|
||||
|
||||
|
@ -1,37 +1,69 @@
|
||||
{{>header this.style}}
|
||||
<main class="results">
|
||||
{{>search_bar this}}
|
||||
<div class="results_aggregated">
|
||||
{{#if results}} {{#each results}}
|
||||
<div class="result">
|
||||
<h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
|
||||
<small>{{{this.url}}}</small>
|
||||
<p>{{{this.description}}}</p>
|
||||
<div class="upstream_engines">
|
||||
{{#each engine}}
|
||||
<span>{{{this}}}</span>
|
||||
{{/each}}
|
||||
</div>
|
||||
{{>search_bar this}}
|
||||
<div class="results_aggregated">
|
||||
{{#if results}} {{#each results}}
|
||||
<div class="result">
|
||||
<h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
|
||||
<small>{{{this.url}}}</small>
|
||||
<p>{{{this.description}}}</p>
|
||||
<div class="upstream_engines">
|
||||
{{#each engine}}
|
||||
<span>{{{this}}}</span>
|
||||
{{/each}}
|
||||
</div>
|
||||
</div>
|
||||
{{/each}} {{else}} {{#if disallowed}}
|
||||
<div class="result_disallowed">
|
||||
<div class="description">
|
||||
<p>
|
||||
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
||||
has been disallowed.
|
||||
</p>
|
||||
<p class="description_paragraph">Dear user,</p>
|
||||
<p class="description_paragraph">
|
||||
The query - <span class="user_query">{{{this.pageQuery}}}</span> - has
|
||||
been blacklisted via server configuration and hence disallowed by the
|
||||
server. Henceforth no results could be displayed for your query.
|
||||
</p>
|
||||
</div>
|
||||
<img src="./images/barricade.png" alt="Image of a Barricade" />
|
||||
</div>
|
||||
{{else}} {{#if filtered}}
|
||||
<div class="result_filtered">
|
||||
<div class="description">
|
||||
<p>
|
||||
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
||||
has been filtered.
|
||||
</p>
|
||||
<p class="description_paragraph">Dear user,</p>
|
||||
<p class="description_paragraph">
|
||||
All the search results contain results that has been configured to be
|
||||
filtered out via server configuration and henceforth has been
|
||||
completely filtered out.
|
||||
</p>
|
||||
</div>
|
||||
<img src="./images/filter.png" alt="Image of a paper inside a funnel" />
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="result_not_found">
|
||||
<p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
|
||||
<p class="suggestions">Suggestions:</p>
|
||||
<ul>
|
||||
<li>Make sure that all words are spelled correctly.</li>
|
||||
<li>Try different keywords.</li>
|
||||
<li>Try more general keywords.</li>
|
||||
</ul>
|
||||
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
||||
</div>
|
||||
{{/if}} {{/if}} {{/if}}
|
||||
</div>
|
||||
{{/each}} {{else}}
|
||||
<div class="result_not_found">
|
||||
<p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
|
||||
<p class="suggestions">Suggestions:</p>
|
||||
<ul>
|
||||
<li>Make sure that all words are spelled correctly.</li>
|
||||
<li>Try different keywords.</li>
|
||||
<li>Try more general keywords.</li>
|
||||
</ul>
|
||||
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
||||
<div class="page_navigation">
|
||||
<button type="button" onclick="navigate_backward()">
|
||||
← previous
|
||||
</button>
|
||||
<button type="button" onclick="navigate_forward()">next →</button>
|
||||
</div>
|
||||
{{/if}}
|
||||
</div>
|
||||
<div class="page_navigation">
|
||||
<button type="button" onclick="navigate_backward()">
|
||||
← previous
|
||||
</button>
|
||||
<button type="button" onclick="navigate_forward()">next →</button>
|
||||
</div>
|
||||
</main>
|
||||
<script src="static/index.js"></script>
|
||||
<script src="static/pagination.js"></script>
|
||||
|
@ -43,6 +43,7 @@ impl SearchEngine for DuckDuckGo {
|
||||
page: u32,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
|
@ -71,6 +71,7 @@ pub trait SearchEngine: Sync + Send {
|
||||
page: u32,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
}
|
||||
|
||||
|
@ -42,12 +42,21 @@ impl SearchEngine for Searx {
|
||||
page: u32,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
mut safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
if safe_search == 3 {
|
||||
safe_search = 2;
|
||||
};
|
||||
|
||||
let url: String = match page {
|
||||
0 | 1 => format!("https://searx.work/search?q={query}&pageno=1"),
|
||||
_ => format!("https://searx.work/search?q={query}&pageno={page}"),
|
||||
0 | 1 => {
|
||||
format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}")
|
||||
}
|
||||
_ => format!(
|
||||
"https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}"
|
||||
),
|
||||
};
|
||||
|
||||
// initializing headers and adding appropriate headers.
|
||||
|
@ -102,13 +102,15 @@ impl EngineErrorInfo {
|
||||
/// and the type of error that caused it.
|
||||
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
||||
/// given search query.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResults {
|
||||
pub results: Vec<SearchResult>,
|
||||
pub page_query: String,
|
||||
pub style: Style,
|
||||
pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
|
||||
pub engine_errors_info: Vec<EngineErrorInfo>,
|
||||
pub disallowed: bool,
|
||||
pub filtered: bool,
|
||||
}
|
||||
|
||||
impl SearchResults {
|
||||
@ -122,6 +124,7 @@ impl SearchResults {
|
||||
/// the search url.
|
||||
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
||||
/// given search query.
|
||||
/// * ``
|
||||
pub fn new(
|
||||
results: Vec<SearchResult>,
|
||||
page_query: &str,
|
||||
@ -131,12 +134,38 @@ impl SearchResults {
|
||||
results,
|
||||
page_query: page_query.to_owned(),
|
||||
style: Style::default(),
|
||||
engine_errors_info: SmallVec::from(engine_errors_info),
|
||||
engine_errors_info: engine_errors_info.to_owned(),
|
||||
disallowed: Default::default(),
|
||||
filtered: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// A setter function to add website style to the return search results.
|
||||
pub fn add_style(&mut self, style: &Style) {
|
||||
self.style = style.to_owned();
|
||||
self.style = style.clone();
|
||||
}
|
||||
|
||||
/// A setter function that sets disallowed to true.
|
||||
pub fn set_disallowed(&mut self) {
|
||||
self.disallowed = true;
|
||||
}
|
||||
|
||||
/// A setter function to set the current page search query.
|
||||
pub fn set_page_query(&mut self, page: &str) {
|
||||
self.page_query = page.to_owned();
|
||||
}
|
||||
|
||||
/// A setter function that sets the filtered to true.
|
||||
pub fn set_filtered(&mut self) {
|
||||
self.filtered = true;
|
||||
}
|
||||
|
||||
/// A getter function that gets the value of `engine_errors_info`.
|
||||
pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
|
||||
std::mem::take(&mut self.engine_errors_info)
|
||||
}
|
||||
/// A getter function that gets the value of `results`.
|
||||
pub fn results(&mut self) -> Vec<SearchResult> {
|
||||
self.results.clone()
|
||||
}
|
||||
}
|
||||
|
@ -70,6 +70,7 @@ pub async fn aggregate(
|
||||
debug: bool,
|
||||
upstream_search_engines: &[EngineHandler],
|
||||
request_timeout: u8,
|
||||
safe_search: u8,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
let user_agent: &str = random_user_agent();
|
||||
|
||||
@ -91,7 +92,13 @@ pub async fn aggregate(
|
||||
let query: String = query.to_owned();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
search_engine
|
||||
.results(&query, page, user_agent, request_timeout)
|
||||
.results(
|
||||
&query,
|
||||
page,
|
||||
user_agent.clone(),
|
||||
request_timeout,
|
||||
safe_search,
|
||||
)
|
||||
.await
|
||||
}));
|
||||
}
|
||||
@ -150,20 +157,22 @@ pub async fn aggregate(
|
||||
}
|
||||
}
|
||||
|
||||
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
filter_with_lists(
|
||||
&mut result_map,
|
||||
&mut blacklist_map,
|
||||
file_path(FileType::BlockList)?,
|
||||
)?;
|
||||
if safe_search >= 3 {
|
||||
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
filter_with_lists(
|
||||
&mut result_map,
|
||||
&mut blacklist_map,
|
||||
file_path(FileType::BlockList)?,
|
||||
)?;
|
||||
|
||||
filter_with_lists(
|
||||
&mut blacklist_map,
|
||||
&mut result_map,
|
||||
file_path(FileType::AllowList)?,
|
||||
)?;
|
||||
filter_with_lists(
|
||||
&mut blacklist_map,
|
||||
&mut result_map,
|
||||
file_path(FileType::AllowList)?,
|
||||
)?;
|
||||
|
||||
drop(blacklist_map);
|
||||
drop(blacklist_map);
|
||||
}
|
||||
|
||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
||||
|
||||
@ -189,7 +198,7 @@ pub fn filter_with_lists(
|
||||
let mut reader = BufReader::new(File::open(file_path)?);
|
||||
|
||||
for line in reader.by_ref().lines() {
|
||||
let re = Regex::new(&line?)?;
|
||||
let re = Regex::new(line?.trim())?;
|
||||
|
||||
// Iterate over each search result in the map and check if it matches the regex pattern
|
||||
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
||||
|
@ -2,7 +2,10 @@
|
||||
//! meta search engine website and provide appropriate response to each route/page
|
||||
//! when requested.
|
||||
|
||||
use std::fs::read_to_string;
|
||||
use std::{
|
||||
fs::{read_to_string, File},
|
||||
io::{BufRead, BufReader, Read},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
cache::cacher::RedisCache,
|
||||
@ -13,12 +16,13 @@ use crate::{
|
||||
};
|
||||
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||
use handlebars::Handlebars;
|
||||
use regex::Regex;
|
||||
use serde::Deserialize;
|
||||
use tokio::join;
|
||||
|
||||
// ---- Constants ----
|
||||
/// Initialize redis cache connection once and store it on the heap.
|
||||
const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
||||
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
||||
|
||||
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||
///
|
||||
@ -32,6 +36,7 @@ const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Once
|
||||
struct SearchParams {
|
||||
q: Option<String>,
|
||||
page: Option<u32>,
|
||||
safesearch: Option<u8>,
|
||||
}
|
||||
|
||||
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
||||
@ -105,42 +110,58 @@ pub async fn search(
|
||||
None => 1,
|
||||
};
|
||||
|
||||
let safe_search: u8 = match config.safe_search {
|
||||
3..=4 => config.safe_search,
|
||||
_ => match ¶ms.safesearch {
|
||||
Some(safesearch) => match safesearch {
|
||||
0..=2 => *safesearch,
|
||||
_ => 1,
|
||||
},
|
||||
None => config.safe_search,
|
||||
},
|
||||
};
|
||||
|
||||
let (_, results, _) = join!(
|
||||
results(
|
||||
format!(
|
||||
"http://{}:{}/search?q={}&page={}",
|
||||
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
||||
config.binding_ip,
|
||||
config.port,
|
||||
query,
|
||||
page - 1
|
||||
page - 1,
|
||||
safe_search
|
||||
),
|
||||
&config,
|
||||
query,
|
||||
page - 1,
|
||||
&req,
|
||||
req.clone(),
|
||||
safe_search
|
||||
),
|
||||
results(
|
||||
format!(
|
||||
"http://{}:{}/search?q={}&page={}",
|
||||
config.binding_ip, config.port, query, page
|
||||
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
||||
config.binding_ip, config.port, query, page, safe_search
|
||||
),
|
||||
&config,
|
||||
query,
|
||||
page,
|
||||
&req,
|
||||
req.clone(),
|
||||
safe_search
|
||||
),
|
||||
results(
|
||||
format!(
|
||||
"http://{}:{}/search?q={}&page={}",
|
||||
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
||||
config.binding_ip,
|
||||
config.port,
|
||||
query,
|
||||
page + 1
|
||||
page + 1,
|
||||
safe_search
|
||||
),
|
||||
&config,
|
||||
query,
|
||||
page + 1,
|
||||
&req,
|
||||
req.clone(),
|
||||
safe_search
|
||||
)
|
||||
);
|
||||
|
||||
@ -160,9 +181,10 @@ async fn results(
|
||||
config: &Config,
|
||||
query: &str,
|
||||
page: u32,
|
||||
req: &HttpRequest,
|
||||
req: HttpRequest,
|
||||
safe_search: u8,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
let redis_cache: RedisCache = REDIS_CACHE
|
||||
let mut redis_cache: RedisCache = REDIS_CACHE
|
||||
.get_or_init(async {
|
||||
// Initialize redis cache connection pool only one and store it in the heap.
|
||||
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
||||
@ -178,6 +200,23 @@ async fn results(
|
||||
match cached_results_json {
|
||||
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
||||
Err(_) => {
|
||||
if safe_search == 4 {
|
||||
let mut results: SearchResults = SearchResults::default();
|
||||
let mut _flag: bool =
|
||||
is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
||||
_flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
|
||||
|
||||
if _flag {
|
||||
results.set_disallowed();
|
||||
results.add_style(&config.style);
|
||||
results.set_page_query(query);
|
||||
redis_cache
|
||||
.cache_results(&serde_json::to_string(&results)?, &url)
|
||||
.await?;
|
||||
return Ok(results);
|
||||
}
|
||||
}
|
||||
|
||||
// check if the cookie value is empty or not if it is empty then use the
|
||||
// default selected upstream search engines from the config file otherwise
|
||||
// parse the non-empty cookie and grab the user selected engines from the
|
||||
@ -199,6 +238,7 @@ async fn results(
|
||||
config.debug,
|
||||
&engines,
|
||||
config.request_timeout,
|
||||
safe_search,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
@ -210,14 +250,16 @@ async fn results(
|
||||
config.debug,
|
||||
&config.upstream_search_engines,
|
||||
config.request_timeout,
|
||||
safe_search,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
|
||||
if results.engine_errors_info().is_empty() && results.results().is_empty() {
|
||||
results.set_filtered();
|
||||
}
|
||||
results.add_style(&config.style);
|
||||
redis_cache
|
||||
.clone()
|
||||
.cache_results(&serde_json::to_string(&results)?, &url)
|
||||
.await?;
|
||||
Ok(results)
|
||||
@ -225,6 +267,22 @@ async fn results(
|
||||
}
|
||||
}
|
||||
|
||||
fn is_match_from_filter_list(
|
||||
file_path: &str,
|
||||
query: &str,
|
||||
) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
let mut flag = false;
|
||||
let mut reader = BufReader::new(File::open(file_path)?);
|
||||
for line in reader.by_ref().lines() {
|
||||
let re = Regex::new(&line?)?;
|
||||
if re.is_match(query) {
|
||||
flag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(flag)
|
||||
}
|
||||
|
||||
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
||||
#[get("/robots.txt")]
|
||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
|
@ -15,6 +15,17 @@ rate_limiter = {
|
||||
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
||||
}
|
||||
|
||||
-- ### Search ###
|
||||
-- Filter results based on different levels. The levels provided are:
|
||||
-- {{
|
||||
-- 0 - None
|
||||
-- 1 - Low
|
||||
-- 2 - Moderate
|
||||
-- 3 - High
|
||||
-- 4 - Aggressive
|
||||
-- }}
|
||||
safe_search = 2
|
||||
|
||||
-- ### Website ###
|
||||
-- The different colorschemes provided are:
|
||||
-- {{
|
||||
|
Loading…
Reference in New Issue
Block a user