mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-21 21:48:21 -05:00
Merge branch 'rolling' into improve-and-fix-settings-page
This commit is contained in:
commit
2b7e28c963
17
Cargo.lock
generated
17
Cargo.lock
generated
@ -268,6 +268,12 @@ dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
|
||||
|
||||
[[package]]
|
||||
name = "askama_escape"
|
||||
version = "0.10.3"
|
||||
@ -739,6 +745,16 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "error-stack"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f00447f331c7f726db5b8532ebc9163519eed03c6d7c8b73c90b3ff5646ac85"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"rustc_version 0.4.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "failure"
|
||||
version = "0.1.8"
|
||||
@ -3370,6 +3386,7 @@ dependencies = [
|
||||
"actix-files",
|
||||
"actix-web",
|
||||
"env_logger",
|
||||
"error-stack",
|
||||
"fake-useragent",
|
||||
"handlebars",
|
||||
"log",
|
||||
|
31
Cargo.toml
31
Cargo.toml
@ -2,8 +2,9 @@
|
||||
name = "websurfx"
|
||||
version = "0.13.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||
repository = "https://github.com/neon-mmd/websurfx"
|
||||
license = "AGPL-3.0"
|
||||
|
||||
[dependencies]
|
||||
reqwest = {version="*",features=["json"]}
|
||||
@ -22,6 +23,32 @@ redis = {version="*"}
|
||||
md5 = {version="*"}
|
||||
rand={version="*"}
|
||||
once_cell = {version="*"}
|
||||
error-stack = {version="0.3.1"}
|
||||
|
||||
[dev-dependencies]
|
||||
rusty-hook = "^0.11.2"
|
||||
|
||||
[profile.dev]
|
||||
opt-level = 0
|
||||
debug = true
|
||||
split-debuginfo = '...'
|
||||
debug-assertions = true
|
||||
overflow-checks = true
|
||||
lto = false
|
||||
panic = 'unwind'
|
||||
incremental = true
|
||||
codegen-units = 256
|
||||
rpath = false
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
debug = false
|
||||
split-debuginfo = '...'
|
||||
debug-assertions = false
|
||||
overflow-checks = false
|
||||
lto = 'thin'
|
||||
panic = 'unwind'
|
||||
incremental = false
|
||||
codegen-units = 16
|
||||
rpath = false
|
||||
strip = "debuginfo"
|
||||
|
28
README.md
28
README.md
@ -1,4 +1,4 @@
|
||||
<h1 align="center">
|
||||
<h1 align="center">
|
||||
<img src="./images/websurfx_logo.png" alt="websurfx logo" align="center" />
|
||||
</h1>
|
||||
<p align="center">
|
||||
@ -39,7 +39,7 @@
|
||||
>meta search engine</a
|
||||
>
|
||||
(pronounced as websurface or web-surface /wɛbˈsɜːrfəs/.) written in Rust. It
|
||||
provides a quick and secure search experience while maintaining user
|
||||
provides a quick and secure search experience while completely respecting user
|
||||
privacy.</i
|
||||
>
|
||||
</p>
|
||||
@ -72,7 +72,7 @@
|
||||
|
||||
# Preview 🔭
|
||||
|
||||
## Main Page
|
||||
## Home Page
|
||||
|
||||
<img align="center" src="./images/main_page.png" />
|
||||
|
||||
@ -88,7 +88,7 @@
|
||||
|
||||
# Features 🚀
|
||||
|
||||
- 🎨 High level of customizability with nine color schemes provided by default with a simple theme, also supporting the creation of your custom themes and colorschemes very quickly and easily
|
||||
- 🎨 Make Websurfx uniquely yours with nine color schemes provided by default. It also supports creation of custom themes and color schemes in a quick and easy way, so unleash your creativity!
|
||||
- 🔐 Fast, private, and secure
|
||||
- 🆓 100% free and open source
|
||||
- 💨 Ad-free and clean results
|
||||
@ -116,7 +116,7 @@ redis-server --port 8082 &
|
||||
Once you have started the server, open your preferred web browser and navigate to <http://127.0.0.1:8080> to start using Websurfx.
|
||||
|
||||
> **Warning**
|
||||
> Please be aware that the project is still in the testing phase and is not ready for production use.
|
||||
> This project is still in the testing phase and is **not** ready for production use.
|
||||
|
||||
**[⬆️ Back to Top](#--)**
|
||||
|
||||
@ -132,14 +132,14 @@ Websurfx is configured through the config.lua file, located at `websurfx/config.
|
||||
|
||||
> For full theming and customization instructions, see: [**Theming**](./docs/theming.md)
|
||||
|
||||
Websurfx comes with several themes and color schemes by default, which you can apply and edit through the config file. Supports custom themes and color schemes using CSS, allowing you to develop your own unique-looking website.
|
||||
Websurfx comes loaded with several themes and color schemes, which you can apply and edit through the config file. It also supports custom themes and color schemes using CSS, allowing you to make it truly yours.
|
||||
|
||||
**[⬆️ Back to Top](#--)**
|
||||
|
||||
# Multi-Language Support 🌍
|
||||
|
||||
> **Note**
|
||||
> Currently, we do not support other languages, but in the future, we will start accepting contributions regarding language support because we believe that language should not be a barrier to entry.
|
||||
> Currently, we do not support other languages but we will start accepting contributions regarding language support in the future. We believe language should never be a barrier to entry.
|
||||
|
||||
**[⬆️ Back to Top](#--)**
|
||||
|
||||
@ -153,15 +153,15 @@ At present, we only support x86_64 architecture systems, but we would love to ha
|
||||
|
||||
## Why Websurfx?
|
||||
|
||||
The primary purpose of the Websurfx project is to create a fast, secure, and privacy-focused meta-search engine. While there are numerous meta-search engines available, not all of them guarantee the security of their search engine, which is critical for maintaining privacy. Memory flaws, for example, can expose private or sensitive information, which is never a good thing. Also, there is the added problem of Spam, ads, and unorganic results which most engines don't have the full-proof answer to it till now but with Websurfx I finally put a full stop to this problem, also, Rust is used to write Websurfx, which ensures memory safety and removes such issues. Many meta-search engines also lack important features like advanced picture search, which is required by many graphic designers, content providers, and others. Websurfx attempts to improve the user experience by providing these and other features, such as proper NSFW blocking and Micro-apps or Quick results (like providing a calculator, currency exchanges, etc in the search results).
|
||||
The primary purpose of the Websurfx project is to create a fast, secure, and privacy-focused meta-search engine. There are numerous meta-search engines available, but not all guarantee the security of their search engine, which is critical for maintaining privacy. Memory flaws, for example, can expose private or sensitive information, which is understandably bad. There is also the added problem of spam, ads, and inorganic results which most engines don't have a fool-proof answer to. Until now. With Websurfx I finally put a full stop to this problem. Websurfx is based on Rust, which ensures memory safety and removes such issues. Many meta-search engines also lack important features like advanced picture search, required by graphic designers, content providers, and others. Websurfx improves the user experience by providing these and other features, such as proper NSFW blocking and Micro-apps or Quick Results (providing a calculator, currency exchanges, etc in the search results).
|
||||
|
||||
## Why AGPLv3?
|
||||
|
||||
Websurfx is distributed under the **AGPLv3** license to keep the source code open and transparent. This helps to keep malware, telemetry, and other dangerous programs out of the project. **AGPLv3** is a strong copyleft license that ensures the software's source code, including any modifications or improvements made to the code, remains open and available to everyone.
|
||||
Websurfx is distributed under the **AGPLv3** license to keep the source code open and transparent. This helps keep malware, telemetry, and other dangers out of the project. **AGPLv3** is a strong copyleft license that ensures the software's source code, including any modifications or improvements made to the code, remains open and available to everyone.
|
||||
|
||||
## Why Rust?
|
||||
|
||||
Rust was chosen as the programming language for Websurfx because of its memory safety features, which can help prevent vulnerabilities and make the codebase more secure. Rust is also faster than C++, which contributes to Websurfx's speed and responsiveness. Furthermore, the Rust ownership and borrowing system enables secure concurrency and thread safety in the program.
|
||||
Websurfx is based on Rust due to its memory safety features, which prevents vulnerabilities and makes the codebase more secure. Rust is also faster than C++, contributing to Websurfx's speed and responsiveness. Finally, the Rust ownership and borrowing system enables secure concurrency and thread safety in the program.
|
||||
|
||||
**[⬆️ Back to Top](#--)**
|
||||
|
||||
@ -175,14 +175,14 @@ We are looking for more willing contributors to help grow this project. For more
|
||||
|
||||
> For full details and other ways you can help out, see: [**Contributing**]()
|
||||
|
||||
If you use Websurfx and would like to contribute to its development, that would be fantastic! Contributions of any size or type are always welcome, and we will properly acknowledge your efforts.
|
||||
If you use Websurfx and would like to contribute to its development, we're glad to have you on board! Contributions of any size or type are always welcome, and we will always acknowledge your efforts.
|
||||
|
||||
Several areas that we need a bit of help with at the moment are:
|
||||
- **Better and more color schemes**: Help fix color schemes and add other famous color schemes.
|
||||
- **Improve evasion code for bot detection** - Help improve code related to evading IP blocking and emulating human behaviors located in everyone's engine file.
|
||||
- **Logo** - Help create a logo for the project and website.
|
||||
- **Docker Support** - Help write a Docker Compose file for the project.
|
||||
- Submit a PR to add a new feature, fix a bug, update the docs, add a theme, widget, or something else.
|
||||
- Submit a PR to add a new feature, fix a bug, update the docs, add a theme, widget, or anything else.
|
||||
- Star Websurfx on GitHub.
|
||||
|
||||
**[⬆️ Back to Top](#--)**
|
||||
@ -196,13 +196,13 @@ Several areas that we need a bit of help with at the moment are:
|
||||
|
||||
# Roadmap 🛣️
|
||||
|
||||
> Coming soon!! 🙂.
|
||||
> Coming soon! 🙂.
|
||||
|
||||
**[⬆️ Back to Top](#--)**
|
||||
|
||||
# Contributing 🙋
|
||||
|
||||
Contributions are welcome from anyone. It doesn\'t matter who you are; you can still contribute to the project in your own way.
|
||||
Contributions are welcome from anyone. It doesn't matter who you are; you can still contribute to the project in your own way.
|
||||
|
||||
## Not a developer but still want to contribute?
|
||||
|
||||
|
@ -1,10 +1,25 @@
|
||||
let search_box = document.querySelector('input')
|
||||
function search_web() {
|
||||
window.location = `search?q=${search_box.value}`
|
||||
/**
|
||||
* Selects the input element for the search box
|
||||
* @type {HTMLInputElement}
|
||||
*/
|
||||
const searchBox = document.querySelector('input');
|
||||
|
||||
/**
|
||||
* Redirects the user to the search results page with the query parameter
|
||||
*/
|
||||
function searchWeb() {
|
||||
const query = searchBox.value.trim();
|
||||
if (query) {
|
||||
window.location.href = `search?q=${encodeURIComponent(query)}`;
|
||||
}
|
||||
}
|
||||
|
||||
search_box.addEventListener('keyup', (e) => {
|
||||
if (e.keyCode === 13) {
|
||||
search_web()
|
||||
/**
|
||||
* Listens for the 'Enter' key press event on the search box and calls the searchWeb function
|
||||
* @param {KeyboardEvent} e - The keyboard event object
|
||||
*/
|
||||
searchBox.addEventListener('keyup', (e) => {
|
||||
if (e.key === 'Enter') {
|
||||
searchWeb();
|
||||
}
|
||||
})
|
||||
});
|
||||
|
@ -1,26 +1,39 @@
|
||||
/**
|
||||
* Navigates to the next page by incrementing the current page number in the URL query parameters.
|
||||
* @returns {void}
|
||||
*/
|
||||
function navigate_forward() {
|
||||
const url = new URL(window.location)
|
||||
const searchParams = url.searchParams
|
||||
const url = new URL(window.location);
|
||||
const searchParams = url.searchParams;
|
||||
|
||||
let q = searchParams.get('q')
|
||||
let page = searchParams.get('page')
|
||||
let q = searchParams.get('q');
|
||||
let page = parseInt(searchParams.get('page'));
|
||||
|
||||
if (page === null) {
|
||||
page = 2
|
||||
window.location = `${url.origin}${url.pathname}?q=${q}&page=${page}`
|
||||
if (isNaN(page)) {
|
||||
page = 1;
|
||||
} else {
|
||||
window.location = `${url.origin}${url.pathname}?q=${q}&page=${++page}`
|
||||
page++;
|
||||
}
|
||||
|
||||
window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigates to the previous page by decrementing the current page number in the URL query parameters.
|
||||
* @returns {void}
|
||||
*/
|
||||
function navigate_backward() {
|
||||
const url = new URL(window.location)
|
||||
const searchParams = url.searchParams
|
||||
const url = new URL(window.location);
|
||||
const searchParams = url.searchParams;
|
||||
|
||||
let q = searchParams.get('q')
|
||||
let page = searchParams.get('page')
|
||||
let q = searchParams.get('q');
|
||||
let page = parseInt(searchParams.get('page'));
|
||||
|
||||
if (page !== null && page > 1) {
|
||||
window.location = `${url.origin}${url.pathname}?q=${q}&page=${--page}`
|
||||
if (isNaN(page)) {
|
||||
page = 1;
|
||||
} else if (page > 1) {
|
||||
page--;
|
||||
}
|
||||
|
||||
window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ impl Config {
|
||||
{
|
||||
Ok("./websurfx/config.lua".to_string())
|
||||
} else {
|
||||
Err(format!("Config file not found!!").into())
|
||||
Err("Config file not found!!".to_string().into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,13 +2,17 @@
|
||||
//! by querying the upstream duckduckgo search engine with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::{collections::HashMap, time::Duration};
|
||||
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
|
||||
use super::engine_models::EngineError;
|
||||
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
|
||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
||||
@ -22,14 +26,15 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page and also returns error if the scraping
|
||||
/// selector fails to initialize"
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
pub async fn results(
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: &str,
|
||||
) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
|
||||
) -> Result<HashMap<String, RawSearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = match page {
|
||||
@ -48,26 +53,71 @@ pub async fn results(
|
||||
|
||||
// initializing HeaderMap and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(USER_AGENT, user_agent.parse()?);
|
||||
header_map.insert(REFERER, "https://google.com/".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
||||
header_map.insert(COOKIE, "kl=wt-wt".parse()?);
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
user_agent
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
REFERER,
|
||||
"https://google.com/"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
CONTENT_TYPE,
|
||||
"application/x-www-form-urlencoded"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
COOKIE,
|
||||
"kl=wt-wt"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
|
||||
// fetch the html from upstream duckduckgo engine
|
||||
// TODO: Write better error handling code to handle no results case.
|
||||
let results: String = reqwest::Client::new()
|
||||
.get(url)
|
||||
.timeout(Duration::from_secs(5))
|
||||
.headers(header_map) // add spoofed headers to emulate human behaviour
|
||||
.send()
|
||||
.await?
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?
|
||||
.text()
|
||||
.await?;
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?;
|
||||
|
||||
let document: Html = Html::parse_document(&results);
|
||||
let results: Selector = Selector::parse(".result")?;
|
||||
let result_title: Selector = Selector::parse(".result__a")?;
|
||||
let result_url: Selector = Selector::parse(".result__url")?;
|
||||
let result_desc: Selector = Selector::parse(".result__snippet")?;
|
||||
|
||||
let no_result: Selector = Selector::parse(".no-results")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".no-results"))?;
|
||||
|
||||
if document.select(&no_result).next().is_some() {
|
||||
return Err(Report::new(EngineError::EmptyResultSet));
|
||||
}
|
||||
|
||||
let results: Selector = Selector::parse(".result")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
|
||||
let result_title: Selector = Selector::parse(".result__a")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__a"))?;
|
||||
let result_url: Selector = Selector::parse(".result__url")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__url"))?;
|
||||
let result_desc: Selector = Selector::parse(".result__snippet")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__snippet"))?;
|
||||
|
||||
// scrape all the results from the html
|
||||
Ok(document
|
||||
|
43
src/engines/engine_models.rs
Normal file
43
src/engines/engine_models.rs
Normal file
@ -0,0 +1,43 @@
|
||||
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||
//! the upstream search engines with the search query provided by the user.
|
||||
|
||||
use error_stack::Context;
|
||||
use std::fmt;
|
||||
|
||||
/// A custom error type used for handle engine associated errors.
|
||||
///
|
||||
/// This enum provides variants three different categories of errors:
|
||||
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
|
||||
/// etc.
|
||||
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
|
||||
/// search engines.
|
||||
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
||||
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
||||
/// all other errors occuring within the code handling the `upstream search engines`.
|
||||
#[derive(Debug)]
|
||||
pub enum EngineError {
|
||||
EmptyResultSet,
|
||||
RequestError,
|
||||
UnexpectedError,
|
||||
}
|
||||
|
||||
impl fmt::Display for EngineError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
EngineError::EmptyResultSet => {
|
||||
write!(f, "The upstream search engine returned an empty result set")
|
||||
}
|
||||
EngineError::RequestError => {
|
||||
write!(
|
||||
f,
|
||||
"Error occurred while requesting data from upstream search engine"
|
||||
)
|
||||
}
|
||||
EngineError::UnexpectedError => {
|
||||
write!(f, "An unexpected error occurred while processing the data")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Context for EngineError {}
|
@ -1,2 +1,3 @@
|
||||
pub mod duckduckgo;
|
||||
pub mod engine_models;
|
||||
pub mod searx;
|
||||
|
@ -8,6 +8,9 @@ use std::collections::HashMap;
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
|
||||
use super::engine_models::EngineError;
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
|
||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
||||
@ -21,40 +24,84 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page and also returns error if the scraping
|
||||
/// selector fails to initialize"
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
pub async fn results(
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: &str,
|
||||
) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
|
||||
) -> Result<HashMap<String, RawSearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
|
||||
|
||||
// initializing headers and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(USER_AGENT, user_agent.parse()?);
|
||||
header_map.insert(REFERER, "https://google.com/".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
||||
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
user_agent
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
REFERER,
|
||||
"https://google.com/"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
CONTENT_TYPE,
|
||||
"application/x-www-form-urlencoded"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
|
||||
|
||||
// fetch the html from upstream searx instance engine
|
||||
// TODO: Write better error handling code to handle no results case.
|
||||
let results: String = reqwest::Client::new()
|
||||
.get(url)
|
||||
.headers(header_map) // add spoofed headers to emulate human behaviours.
|
||||
.send()
|
||||
.await?
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?
|
||||
.text()
|
||||
.await?;
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?;
|
||||
|
||||
let document: Html = Html::parse_document(&results);
|
||||
let results: Selector = Selector::parse(".result")?;
|
||||
let result_title: Selector = Selector::parse("h3>a")?;
|
||||
let result_url: Selector = Selector::parse("h3>a")?;
|
||||
let result_desc: Selector = Selector::parse(".content")?;
|
||||
|
||||
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", "#urls>.dialog-error>p"))?;
|
||||
|
||||
if let Some(no_result_msg) = document.select(&no_result).nth(1) {
|
||||
if no_result_msg.inner_html()
|
||||
== "we didn't find any results. Please use another query or search in more categories"
|
||||
{
|
||||
return Err(Report::new(EngineError::EmptyResultSet));
|
||||
}
|
||||
}
|
||||
|
||||
let results: Selector = Selector::parse(".result")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
|
||||
let result_title: Selector = Selector::parse("h3>a")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
|
||||
let result_url: Selector = Selector::parse("h3>a")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
|
||||
|
||||
let result_desc: Selector = Selector::parse(".content")
|
||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".content"))?;
|
||||
|
||||
// scrape all the results from the html
|
||||
Ok(document
|
||||
|
@ -58,8 +58,19 @@ pub async fn aggregate(
|
||||
searx::results(query, page, &user_agent)
|
||||
);
|
||||
|
||||
let ddg_map_results: HashMap<String, RawSearchResult> = ddg_map_results?;
|
||||
let searx_map_results: HashMap<String, RawSearchResult> = searx_map_results?;
|
||||
let ddg_map_results = ddg_map_results.unwrap_or_else(|e| {
|
||||
if debug {
|
||||
log::error!("Error fetching results from DuckDuckGo: {:?}", e);
|
||||
}
|
||||
HashMap::new()
|
||||
});
|
||||
|
||||
let searx_map_results = searx_map_results.unwrap_or_else(|e| {
|
||||
if debug {
|
||||
log::error!("Error fetching results from Searx: {:?}", e);
|
||||
}
|
||||
HashMap::new()
|
||||
});
|
||||
|
||||
result_map.extend(ddg_map_results);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user