0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-22 05:58:21 -05:00

Merge branch 'rolling' into improve-and-fix-settings-page

This commit is contained in:
neon_arch 2023-06-18 20:48:38 +03:00 committed by GitHub
commit 2b7e28c963
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 295 additions and 71 deletions

17
Cargo.lock generated
View File

@ -268,6 +268,12 @@ dependencies = [
"alloc-no-stdlib", "alloc-no-stdlib",
] ]
[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
[[package]] [[package]]
name = "askama_escape" name = "askama_escape"
version = "0.10.3" version = "0.10.3"
@ -739,6 +745,16 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "error-stack"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f00447f331c7f726db5b8532ebc9163519eed03c6d7c8b73c90b3ff5646ac85"
dependencies = [
"anyhow",
"rustc_version 0.4.0",
]
[[package]] [[package]]
name = "failure" name = "failure"
version = "0.1.8" version = "0.1.8"
@ -3370,6 +3386,7 @@ dependencies = [
"actix-files", "actix-files",
"actix-web", "actix-web",
"env_logger", "env_logger",
"error-stack",
"fake-useragent", "fake-useragent",
"handlebars", "handlebars",
"log", "log",

View File

@ -2,8 +2,9 @@
name = "websurfx" name = "websurfx"
version = "0.13.0" version = "0.13.0"
edition = "2021" edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html repository = "https://github.com/neon-mmd/websurfx"
license = "AGPL-3.0"
[dependencies] [dependencies]
reqwest = {version="*",features=["json"]} reqwest = {version="*",features=["json"]}
@ -22,6 +23,32 @@ redis = {version="*"}
md5 = {version="*"} md5 = {version="*"}
rand={version="*"} rand={version="*"}
once_cell = {version="*"} once_cell = {version="*"}
error-stack = {version="0.3.1"}
[dev-dependencies] [dev-dependencies]
rusty-hook = "^0.11.2" rusty-hook = "^0.11.2"
[profile.dev]
opt-level = 0
debug = true
split-debuginfo = '...'
debug-assertions = true
overflow-checks = true
lto = false
panic = 'unwind'
incremental = true
codegen-units = 256
rpath = false
[profile.release]
opt-level = 3
debug = false
split-debuginfo = '...'
debug-assertions = false
overflow-checks = false
lto = 'thin'
panic = 'unwind'
incremental = false
codegen-units = 16
rpath = false
strip = "debuginfo"

View File

@ -1,4 +1,4 @@
<h1 align="center"> <h1 align="center">
<img src="./images/websurfx_logo.png" alt="websurfx logo" align="center" /> <img src="./images/websurfx_logo.png" alt="websurfx logo" align="center" />
</h1> </h1>
<p align="center"> <p align="center">
@ -39,7 +39,7 @@
>meta search engine</a >meta search engine</a
> >
(pronounced as websurface or web-surface /wɛbˈːrfəs/.) written in Rust. It (pronounced as websurface or web-surface /wɛbˈːrfəs/.) written in Rust. It
provides a quick and secure search experience while maintaining user provides a quick and secure search experience while completely respecting user
privacy.</i privacy.</i
> >
</p> </p>
@ -72,7 +72,7 @@
# Preview 🔭 # Preview 🔭
## Main Page ## Home Page
<img align="center" src="./images/main_page.png" /> <img align="center" src="./images/main_page.png" />
@ -88,7 +88,7 @@
# Features 🚀 # Features 🚀
- 🎨 High level of customizability with nine color schemes provided by default with a simple theme, also supporting the creation of your custom themes and colorschemes very quickly and easily - 🎨 Make Websurfx uniquely yours with nine color schemes provided by default. It also supports creation of custom themes and color schemes in a quick and easy way, so unleash your creativity!
- 🔐 Fast, private, and secure - 🔐 Fast, private, and secure
- 🆓 100% free and open source - 🆓 100% free and open source
- 💨 Ad-free and clean results - 💨 Ad-free and clean results
@ -116,7 +116,7 @@ redis-server --port 8082 &
Once you have started the server, open your preferred web browser and navigate to <http://127.0.0.1:8080> to start using Websurfx. Once you have started the server, open your preferred web browser and navigate to <http://127.0.0.1:8080> to start using Websurfx.
> **Warning** > **Warning**
> Please be aware that the project is still in the testing phase and is not ready for production use. > This project is still in the testing phase and is **not** ready for production use.
**[⬆️ Back to Top](#--)** **[⬆️ Back to Top](#--)**
@ -132,14 +132,14 @@ Websurfx is configured through the config.lua file, located at `websurfx/config.
> For full theming and customization instructions, see: [**Theming**](./docs/theming.md) > For full theming and customization instructions, see: [**Theming**](./docs/theming.md)
Websurfx comes with several themes and color schemes by default, which you can apply and edit through the config file. Supports custom themes and color schemes using CSS, allowing you to develop your own unique-looking website. Websurfx comes loaded with several themes and color schemes, which you can apply and edit through the config file. It also supports custom themes and color schemes using CSS, allowing you to make it truly yours.
**[⬆️ Back to Top](#--)** **[⬆️ Back to Top](#--)**
# Multi-Language Support 🌍 # Multi-Language Support 🌍
> **Note** > **Note**
> Currently, we do not support other languages, but in the future, we will start accepting contributions regarding language support because we believe that language should not be a barrier to entry. > Currently, we do not support other languages but we will start accepting contributions regarding language support in the future. We believe language should never be a barrier to entry.
**[⬆️ Back to Top](#--)** **[⬆️ Back to Top](#--)**
@ -153,15 +153,15 @@ At present, we only support x86_64 architecture systems, but we would love to ha
## Why Websurfx? ## Why Websurfx?
The primary purpose of the Websurfx project is to create a fast, secure, and privacy-focused meta-search engine. While there are numerous meta-search engines available, not all of them guarantee the security of their search engine, which is critical for maintaining privacy. Memory flaws, for example, can expose private or sensitive information, which is never a good thing. Also, there is the added problem of Spam, ads, and unorganic results which most engines don't have the full-proof answer to it till now but with Websurfx I finally put a full stop to this problem, also, Rust is used to write Websurfx, which ensures memory safety and removes such issues. Many meta-search engines also lack important features like advanced picture search, which is required by many graphic designers, content providers, and others. Websurfx attempts to improve the user experience by providing these and other features, such as proper NSFW blocking and Micro-apps or Quick results (like providing a calculator, currency exchanges, etc in the search results). The primary purpose of the Websurfx project is to create a fast, secure, and privacy-focused meta-search engine. There are numerous meta-search engines available, but not all guarantee the security of their search engine, which is critical for maintaining privacy. Memory flaws, for example, can expose private or sensitive information, which is understandably bad. There is also the added problem of spam, ads, and inorganic results which most engines don't have a fool-proof answer to. Until now. With Websurfx I finally put a full stop to this problem. Websurfx is based on Rust, which ensures memory safety and removes such issues. Many meta-search engines also lack important features like advanced picture search, required by graphic designers, content providers, and others. Websurfx improves the user experience by providing these and other features, such as proper NSFW blocking and Micro-apps or Quick Results (providing a calculator, currency exchanges, etc in the search results).
## Why AGPLv3? ## Why AGPLv3?
Websurfx is distributed under the **AGPLv3** license to keep the source code open and transparent. This helps to keep malware, telemetry, and other dangerous programs out of the project. **AGPLv3** is a strong copyleft license that ensures the software's source code, including any modifications or improvements made to the code, remains open and available to everyone. Websurfx is distributed under the **AGPLv3** license to keep the source code open and transparent. This helps keep malware, telemetry, and other dangers out of the project. **AGPLv3** is a strong copyleft license that ensures the software's source code, including any modifications or improvements made to the code, remains open and available to everyone.
## Why Rust? ## Why Rust?
Rust was chosen as the programming language for Websurfx because of its memory safety features, which can help prevent vulnerabilities and make the codebase more secure. Rust is also faster than C++, which contributes to Websurfx's speed and responsiveness. Furthermore, the Rust ownership and borrowing system enables secure concurrency and thread safety in the program. Websurfx is based on Rust due to its memory safety features, which prevents vulnerabilities and makes the codebase more secure. Rust is also faster than C++, contributing to Websurfx's speed and responsiveness. Finally, the Rust ownership and borrowing system enables secure concurrency and thread safety in the program.
**[⬆️ Back to Top](#--)** **[⬆️ Back to Top](#--)**
@ -175,14 +175,14 @@ We are looking for more willing contributors to help grow this project. For more
> For full details and other ways you can help out, see: [**Contributing**]() > For full details and other ways you can help out, see: [**Contributing**]()
If you use Websurfx and would like to contribute to its development, that would be fantastic! Contributions of any size or type are always welcome, and we will properly acknowledge your efforts. If you use Websurfx and would like to contribute to its development, we're glad to have you on board! Contributions of any size or type are always welcome, and we will always acknowledge your efforts.
Several areas that we need a bit of help with at the moment are: Several areas that we need a bit of help with at the moment are:
- **Better and more color schemes**: Help fix color schemes and add other famous color schemes. - **Better and more color schemes**: Help fix color schemes and add other famous color schemes.
- **Improve evasion code for bot detection** - Help improve code related to evading IP blocking and emulating human behaviors located in everyone's engine file. - **Improve evasion code for bot detection** - Help improve code related to evading IP blocking and emulating human behaviors located in everyone's engine file.
- **Logo** - Help create a logo for the project and website. - **Logo** - Help create a logo for the project and website.
- **Docker Support** - Help write a Docker Compose file for the project. - **Docker Support** - Help write a Docker Compose file for the project.
- Submit a PR to add a new feature, fix a bug, update the docs, add a theme, widget, or something else. - Submit a PR to add a new feature, fix a bug, update the docs, add a theme, widget, or anything else.
- Star Websurfx on GitHub. - Star Websurfx on GitHub.
**[⬆️ Back to Top](#--)** **[⬆️ Back to Top](#--)**
@ -196,13 +196,13 @@ Several areas that we need a bit of help with at the moment are:
# Roadmap 🛣️ # Roadmap 🛣️
> Coming soon!! 🙂. > Coming soon! 🙂.
**[⬆️ Back to Top](#--)** **[⬆️ Back to Top](#--)**
# Contributing 🙋 # Contributing 🙋
Contributions are welcome from anyone. It doesn\'t matter who you are; you can still contribute to the project in your own way. Contributions are welcome from anyone. It doesn't matter who you are; you can still contribute to the project in your own way.
## Not a developer but still want to contribute? ## Not a developer but still want to contribute?

View File

@ -1,10 +1,25 @@
let search_box = document.querySelector('input') /**
function search_web() { * Selects the input element for the search box
window.location = `search?q=${search_box.value}` * @type {HTMLInputElement}
*/
const searchBox = document.querySelector('input');
/**
* Redirects the user to the search results page with the query parameter
*/
function searchWeb() {
const query = searchBox.value.trim();
if (query) {
window.location.href = `search?q=${encodeURIComponent(query)}`;
}
} }
search_box.addEventListener('keyup', (e) => { /**
if (e.keyCode === 13) { * Listens for the 'Enter' key press event on the search box and calls the searchWeb function
search_web() * @param {KeyboardEvent} e - The keyboard event object
*/
searchBox.addEventListener('keyup', (e) => {
if (e.key === 'Enter') {
searchWeb();
} }
}) });

View File

@ -1,26 +1,39 @@
/**
* Navigates to the next page by incrementing the current page number in the URL query parameters.
* @returns {void}
*/
function navigate_forward() { function navigate_forward() {
const url = new URL(window.location) const url = new URL(window.location);
const searchParams = url.searchParams const searchParams = url.searchParams;
let q = searchParams.get('q') let q = searchParams.get('q');
let page = searchParams.get('page') let page = parseInt(searchParams.get('page'));
if (page === null) { if (isNaN(page)) {
page = 2 page = 1;
window.location = `${url.origin}${url.pathname}?q=${q}&page=${page}`
} else { } else {
window.location = `${url.origin}${url.pathname}?q=${q}&page=${++page}` page++;
} }
window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
} }
/**
* Navigates to the previous page by decrementing the current page number in the URL query parameters.
* @returns {void}
*/
function navigate_backward() { function navigate_backward() {
const url = new URL(window.location) const url = new URL(window.location);
const searchParams = url.searchParams const searchParams = url.searchParams;
let q = searchParams.get('q') let q = searchParams.get('q');
let page = searchParams.get('page') let page = parseInt(searchParams.get('page'));
if (page !== null && page > 1) { if (isNaN(page)) {
window.location = `${url.origin}${url.pathname}?q=${q}&page=${--page}` page = 1;
} else if (page > 1) {
page--;
} }
window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
} }

View File

@ -118,7 +118,7 @@ impl Config {
{ {
Ok("./websurfx/config.lua".to_string()) Ok("./websurfx/config.lua".to_string())
} else { } else {
Err(format!("Config file not found!!").into()) Err("Config file not found!!".to_string().into())
} }
} }
} }

View File

@ -2,13 +2,17 @@
//! by querying the upstream duckduckgo search engine with user provided query and with a page //! by querying the upstream duckduckgo search engine with user provided query and with a page
//! number if provided. //! number if provided.
use std::collections::HashMap; use std::{collections::HashMap, time::Duration};
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use crate::search_results_handler::aggregation_models::RawSearchResult; use crate::search_results_handler::aggregation_models::RawSearchResult;
use super::engine_models::EngineError;
use error_stack::{IntoReport, Report, Result, ResultExt};
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from) /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
@ -22,14 +26,15 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
/// ///
/// # Errors /// # Errors
/// ///
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
/// reach the above `upstream search engine` page and also returns error if the scraping /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
/// selector fails to initialize" /// provide results for the requested search query and also returns error if the scraping selector
/// or HeaderMap fails to initialize.
pub async fn results( pub async fn results(
query: &str, query: &str,
page: u32, page: u32,
user_agent: &str, user_agent: &str,
) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> { ) -> Result<HashMap<String, RawSearchResult>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = match page { let url: String = match page {
@ -48,26 +53,71 @@ pub async fn results(
// initializing HeaderMap and adding appropriate headers. // initializing HeaderMap and adding appropriate headers.
let mut header_map = HeaderMap::new(); let mut header_map = HeaderMap::new();
header_map.insert(USER_AGENT, user_agent.parse()?); header_map.insert(
header_map.insert(REFERER, "https://google.com/".parse()?); USER_AGENT,
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?); user_agent
header_map.insert(COOKIE, "kl=wt-wt".parse()?); .parse()
.into_report()
.change_context(EngineError::UnexpectedError)?,
);
header_map.insert(
REFERER,
"https://google.com/"
.parse()
.into_report()
.change_context(EngineError::UnexpectedError)?,
);
header_map.insert(
CONTENT_TYPE,
"application/x-www-form-urlencoded"
.parse()
.into_report()
.change_context(EngineError::UnexpectedError)?,
);
header_map.insert(
COOKIE,
"kl=wt-wt"
.parse()
.into_report()
.change_context(EngineError::UnexpectedError)?,
);
// fetch the html from upstream duckduckgo engine // fetch the html from upstream duckduckgo engine
// TODO: Write better error handling code to handle no results case.
let results: String = reqwest::Client::new() let results: String = reqwest::Client::new()
.get(url) .get(url)
.timeout(Duration::from_secs(5))
.headers(header_map) // add spoofed headers to emulate human behaviour .headers(header_map) // add spoofed headers to emulate human behaviour
.send() .send()
.await? .await
.into_report()
.change_context(EngineError::RequestError)?
.text() .text()
.await?; .await
.into_report()
.change_context(EngineError::RequestError)?;
let document: Html = Html::parse_document(&results); let document: Html = Html::parse_document(&results);
let results: Selector = Selector::parse(".result")?;
let result_title: Selector = Selector::parse(".result__a")?; let no_result: Selector = Selector::parse(".no-results")
let result_url: Selector = Selector::parse(".result__url")?; .map_err(|_| Report::new(EngineError::UnexpectedError))
let result_desc: Selector = Selector::parse(".result__snippet")?; .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".no-results"))?;
if document.select(&no_result).next().is_some() {
return Err(Report::new(EngineError::EmptyResultSet));
}
let results: Selector = Selector::parse(".result")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
let result_title: Selector = Selector::parse(".result__a")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__a"))?;
let result_url: Selector = Selector::parse(".result__url")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__url"))?;
let result_desc: Selector = Selector::parse(".result__snippet")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__snippet"))?;
// scrape all the results from the html // scrape all the results from the html
Ok(document Ok(document

View File

@ -0,0 +1,43 @@
//! This module provides the error enum to handle different errors associated while requesting data from
//! the upstream search engines with the search query provided by the user.
use error_stack::Context;
use std::fmt;
/// A custom error type used for handle engine associated errors.
///
/// This enum provides variants three different categories of errors:
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
/// etc.
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
/// search engines.
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
/// all other errors occuring within the code handling the `upstream search engines`.
#[derive(Debug)]
pub enum EngineError {
EmptyResultSet,
RequestError,
UnexpectedError,
}
impl fmt::Display for EngineError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
EngineError::EmptyResultSet => {
write!(f, "The upstream search engine returned an empty result set")
}
EngineError::RequestError => {
write!(
f,
"Error occurred while requesting data from upstream search engine"
)
}
EngineError::UnexpectedError => {
write!(f, "An unexpected error occurred while processing the data")
}
}
}
}
impl Context for EngineError {}

View File

@ -1,2 +1,3 @@
pub mod duckduckgo; pub mod duckduckgo;
pub mod engine_models;
pub mod searx; pub mod searx;

View File

@ -8,6 +8,9 @@ use std::collections::HashMap;
use crate::search_results_handler::aggregation_models::RawSearchResult; use crate::search_results_handler::aggregation_models::RawSearchResult;
use super::engine_models::EngineError;
use error_stack::{IntoReport, Report, Result, ResultExt};
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from) /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
@ -21,40 +24,84 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
/// ///
/// # Errors /// # Errors
/// ///
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
/// reach the above `upstream search engine` page and also returns error if the scraping /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
/// selector fails to initialize" /// provide results for the requested search query and also returns error if the scraping selector
/// or HeaderMap fails to initialize.
pub async fn results( pub async fn results(
query: &str, query: &str,
page: u32, page: u32,
user_agent: &str, user_agent: &str,
) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> { ) -> Result<HashMap<String, RawSearchResult>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required // Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = format!("https://searx.work/search?q={query}&pageno={page}"); let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
// initializing headers and adding appropriate headers. // initializing headers and adding appropriate headers.
let mut header_map = HeaderMap::new(); let mut header_map = HeaderMap::new();
header_map.insert(USER_AGENT, user_agent.parse()?); header_map.insert(
header_map.insert(REFERER, "https://google.com/".parse()?); USER_AGENT,
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?); user_agent
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?); .parse()
.into_report()
.change_context(EngineError::UnexpectedError)?,
);
header_map.insert(
REFERER,
"https://google.com/"
.parse()
.into_report()
.change_context(EngineError::UnexpectedError)?,
);
header_map.insert(
CONTENT_TYPE,
"application/x-www-form-urlencoded"
.parse()
.into_report()
.change_context(EngineError::UnexpectedError)?,
);
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
// fetch the html from upstream searx instance engine // fetch the html from upstream searx instance engine
// TODO: Write better error handling code to handle no results case.
let results: String = reqwest::Client::new() let results: String = reqwest::Client::new()
.get(url) .get(url)
.headers(header_map) // add spoofed headers to emulate human behaviours. .headers(header_map) // add spoofed headers to emulate human behaviours.
.send() .send()
.await? .await
.into_report()
.change_context(EngineError::RequestError)?
.text() .text()
.await?; .await
.into_report()
.change_context(EngineError::RequestError)?;
let document: Html = Html::parse_document(&results); let document: Html = Html::parse_document(&results);
let results: Selector = Selector::parse(".result")?;
let result_title: Selector = Selector::parse("h3>a")?; let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
let result_url: Selector = Selector::parse("h3>a")?; .map_err(|_| Report::new(EngineError::UnexpectedError))
let result_desc: Selector = Selector::parse(".content")?; .attach_printable_lazy(|| format!("invalid CSS selector: {}", "#urls>.dialog-error>p"))?;
if let Some(no_result_msg) = document.select(&no_result).nth(1) {
if no_result_msg.inner_html()
== "we didn't find any results. Please use another query or search in more categories"
{
return Err(Report::new(EngineError::EmptyResultSet));
}
}
let results: Selector = Selector::parse(".result")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
let result_title: Selector = Selector::parse("h3>a")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
let result_url: Selector = Selector::parse("h3>a")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
let result_desc: Selector = Selector::parse(".content")
.map_err(|_| Report::new(EngineError::UnexpectedError))
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".content"))?;
// scrape all the results from the html // scrape all the results from the html
Ok(document Ok(document

View File

@ -58,8 +58,19 @@ pub async fn aggregate(
searx::results(query, page, &user_agent) searx::results(query, page, &user_agent)
); );
let ddg_map_results: HashMap<String, RawSearchResult> = ddg_map_results?; let ddg_map_results = ddg_map_results.unwrap_or_else(|e| {
let searx_map_results: HashMap<String, RawSearchResult> = searx_map_results?; if debug {
log::error!("Error fetching results from DuckDuckGo: {:?}", e);
}
HashMap::new()
});
let searx_map_results = searx_map_results.unwrap_or_else(|e| {
if debug {
log::error!("Error fetching results from Searx: {:?}", e);
}
HashMap::new()
});
result_map.extend(ddg_map_results); result_map.extend(ddg_map_results);