mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-12-27 06:48:23 -05:00
Merge branch 'rolling' into rolling
This commit is contained in:
commit
4e11bedc91
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -4066,7 +4066,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
|
||||
|
||||
[[package]]
|
||||
name = "websurfx"
|
||||
version = "1.3.6"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-files",
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "websurfx"
|
||||
version = "1.3.6"
|
||||
version = "1.4.0"
|
||||
edition = "2021"
|
||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||
repository = "https://github.com/neon-mmd/websurfx"
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 7.2 KiB After Width: | Height: | Size: 8.3 KiB |
7
public/images/websurfx_logo.svg
Normal file
7
public/images/websurfx_logo.svg
Normal file
@ -0,0 +1,7 @@
|
||||
<svg viewBox="0 0 173 57" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M77.8201 21.4277L73.4513 35.5049H70.3855L67.5496 25.1067L64.7137 35.5049H61.6479L57.2536 21.4277H60.2172L63.1553 32.7457L66.1444 21.4277H69.1847L72.0461 32.6946L74.9586 21.4277H77.8201ZM92.8986 28.1214C92.8986 28.6494 92.8645 29.1263 92.7964 29.5521H82.0405C82.1257 30.6762 82.543 31.5789 83.2924 32.2602C84.0418 32.9415 84.9616 33.2822 86.0516 33.2822C87.6186 33.2822 88.7257 32.6264 89.3729 31.3149H92.5154C92.0896 32.6094 91.3146 33.6739 90.1905 34.5085C89.0834 35.326 87.7038 35.7348 86.0516 35.7348C84.7061 35.7348 83.4968 35.4368 82.4238 34.8406C81.3678 34.2275 80.5332 33.3758 79.92 32.2858C79.3239 31.1787 79.0258 29.9013 79.0258 28.4535C79.0258 27.0058 79.3154 25.7369 79.8945 24.6468C80.4906 23.5397 81.3167 22.6881 82.3727 22.092C83.4457 21.4958 84.672 21.1978 86.0516 21.1978C87.3801 21.1978 88.5639 21.4873 89.6029 22.0664C90.6418 22.6455 91.4509 23.4631 92.03 24.5191C92.6091 25.558 92.8986 26.7588 92.8986 28.1214ZM89.8583 27.2016C89.8413 26.1286 89.4581 25.2685 88.7087 24.6213C87.9592 23.974 87.031 23.6504 85.9239 23.6504C84.919 23.6504 84.0589 23.974 83.3435 24.6213C82.6281 25.2515 82.2023 26.1116 82.0661 27.2016H89.8583ZM98.6773 23.5227C99.1713 22.8414 99.844 22.2878 100.696 21.862C101.564 21.4192 102.527 21.1978 103.583 21.1978C104.826 21.1978 105.95 21.4958 106.955 22.092C107.96 22.6881 108.752 23.5397 109.331 24.6468C109.91 25.7369 110.2 26.9887 110.2 28.4024C110.2 29.8161 109.91 31.085 109.331 32.2091C108.752 33.3162 107.951 34.1849 106.929 34.8151C105.925 35.4282 104.809 35.7348 103.583 35.7348C102.493 35.7348 101.522 35.5219 100.67 35.0961C99.8355 34.6703 99.1713 34.1253 98.6773 33.461V35.5049H95.7648V16.5991H98.6773V23.5227ZM107.236 28.4024C107.236 27.4316 107.032 26.597 106.623 25.8987C106.231 25.1833 105.703 24.6468 105.039 24.2891C104.392 23.9144 103.693 23.7271 102.944 23.7271C102.212 23.7271 101.513 23.9144 100.849 24.2891C100.202 24.6638 99.6737 25.2089 99.265 25.9242C98.8732 26.6396 98.6773 27.4827 98.6773 28.4535C98.6773 29.4244 98.8732 30.276 99.265 31.0084C99.6737 31.7237 100.202 32.2688 100.849 32.6435C101.513 33.0182 102.212 33.2055 102.944 33.2055C103.693 33.2055 104.392 33.0182 105.039 32.6435C105.703 32.2517 106.231 31.6897 106.623 30.9573C107.032 30.2249 107.236 29.3733 107.236 28.4024ZM118.19 35.7348C117.082 35.7348 116.086 35.5389 115.2 35.1472C114.332 34.7384 113.642 34.1934 113.131 33.5121C112.62 32.8138 112.347 32.0388 112.313 31.1872H115.328C115.379 31.7833 115.66 32.2858 116.171 32.6946C116.699 33.0863 117.355 33.2822 118.138 33.2822C118.956 33.2822 119.586 33.1289 120.029 32.8223C120.489 32.4987 120.719 32.0899 120.719 31.596C120.719 31.068 120.463 30.6762 119.952 30.4207C119.458 30.1653 118.666 29.8842 117.576 29.5777C116.52 29.2881 115.66 29.0071 114.996 28.7346C114.332 28.462 113.753 28.0447 113.259 27.4827C112.782 26.9206 112.543 26.1797 112.543 25.26C112.543 24.5105 112.765 23.8293 113.208 23.2161C113.65 22.5859 114.281 22.092 115.098 21.7343C115.933 21.3766 116.887 21.1978 117.96 21.1978C119.561 21.1978 120.847 21.6065 121.817 22.4241C122.805 23.2246 123.333 24.3232 123.401 25.7198H120.489C120.438 25.0896 120.182 24.5872 119.722 24.2125C119.263 23.8378 118.641 23.6504 117.857 23.6504C117.091 23.6504 116.503 23.7952 116.095 24.0847C115.686 24.3743 115.481 24.7575 115.481 25.2344C115.481 25.6091 115.618 25.9242 115.89 26.1797C116.163 26.4352 116.495 26.6396 116.887 26.7929C117.278 26.9291 117.857 27.108 118.624 27.3294C119.646 27.6019 120.48 27.8829 121.128 28.1725C121.792 28.445 122.362 28.8538 122.839 29.3988C123.316 29.9438 123.563 30.6677 123.58 31.5704C123.58 32.3709 123.359 33.0863 122.916 33.7165C122.473 34.3467 121.843 34.8406 121.025 35.1983C120.225 35.556 119.28 35.7348 118.19 35.7348ZM139.476 21.4277V35.5049H136.563V33.8442C136.104 34.4233 135.499 34.8832 134.75 35.2239C134.017 35.5475 133.234 35.7093 132.399 35.7093C131.292 35.7093 130.296 35.4793 129.41 35.0195C128.541 34.5596 127.851 33.8783 127.34 32.9756C126.847 32.0729 126.6 30.9828 126.6 29.7054V21.4277H129.487V29.2711C129.487 30.5315 129.802 31.5023 130.432 32.1836C131.062 32.8478 131.922 33.18 133.012 33.18C134.102 33.18 134.962 32.8478 135.593 32.1836C136.24 31.5023 136.563 30.5315 136.563 29.2711V21.4277H139.476ZM146.231 23.4716C146.657 22.7562 147.219 22.2027 147.918 21.8109C148.633 21.4022 149.476 21.1978 150.447 21.1978V24.2125H149.706C148.565 24.2125 147.696 24.502 147.1 25.0811C146.521 25.6602 146.231 26.6651 146.231 28.0958V35.5049H143.319V21.4277H146.231V23.4716ZM159.026 23.8037H156.42V35.5049H153.482V23.8037H151.821V21.4277H153.482V20.4313C153.482 18.8133 153.907 17.638 154.759 16.9056C155.628 16.1562 156.982 15.7815 158.821 15.7815V18.2086C157.936 18.2086 157.314 18.3789 156.956 18.7196C156.599 19.0432 156.42 19.6138 156.42 20.4313V21.4277H159.026V23.8037ZM167.636 28.3769L172.184 35.5049H168.888L165.848 30.7273L162.986 35.5049H159.946L164.494 28.5813L159.946 21.4277H163.242L166.282 26.2053L169.144 21.4277H172.184L167.636 28.3769Z" fill="white"/>
|
||||
<path d="M2.21486 42.7894C1.15271 43.0507 0.550463 44.1151 1.00616 45.1192C4.17619 52.1035 11.5005 54.9673 23.3646 52.0493C35.2399 49.1285 47.5128 41.4358 47.2254 33.7293C47.1854 32.6562 46.0226 32.0146 44.9605 32.2759L2.21486 42.7894Z" fill="white"/>
|
||||
<path d="M20.1227 10.0027C21.9192 10.8048 23.7313 11.7606 25.4259 12.8819C28.7827 15.1031 31.9178 18.1341 33.329 22.1366C34.1626 24.5009 34.0742 26.7513 33.2144 28.7679C32.4048 30.6666 30.9903 32.178 29.4212 33.3664C37.6699 31.0439 47.0335 26.0679 44.0686 17.608C40.9417 8.68557 29.3768 3.38405 21.266 1.04683C19.3981 0.508566 17.8191 2.37853 18.4252 4.22557C18.9773 5.90835 19.5596 7.85665 20.1227 10.0027Z" fill="white"/>
|
||||
<path d="M8.27125 34.3558C8.02834 30.0503 7.01551 25.8501 5.987 22.6653C5.38101 20.7888 6.95924 18.8318 8.8458 19.4057C13.6444 20.8655 19.4581 23.6235 21.1736 27.928C23.1268 32.8287 16.4467 35.584 11.1405 36.7375C9.66674 37.0579 8.35621 35.8616 8.27125 34.3558Z" fill="white"/>
|
||||
<path d="M12.5601 18.017C14.2332 18.6725 15.9372 19.4786 17.5019 20.4515C19.9805 21.9927 22.38 24.1208 23.5241 26.9914C24.2516 28.8168 24.2152 30.6223 23.4834 32.2482C23.1213 33.0529 22.6157 33.7553 22.0354 34.3669C27.6731 32.3348 32.9532 28.6804 30.9428 22.9781C28.6334 16.428 20.4047 12.4027 14.1807 10.4674C12.3234 9.88988 10.7357 11.7563 11.3182 13.6121C11.7303 14.9248 12.1549 16.4076 12.5601 18.017Z" fill="white" fill-opacity="0.89"/>
|
||||
</svg>
|
After Width: | Height: | Size: 6.3 KiB |
@ -33,6 +33,10 @@ body {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.websurfx-logo {
|
||||
width: clamp(12rem, 40rem, 48rem);
|
||||
}
|
||||
|
||||
/* styles for the search box and search button */
|
||||
|
||||
.search_bar {
|
||||
|
@ -7,3 +7,4 @@ pub mod brave;
|
||||
pub mod duckduckgo;
|
||||
pub mod search_result_parser;
|
||||
pub mod searx;
|
||||
pub mod startpage;
|
||||
|
96
src/engines/startpage.rs
Normal file
96
src/engines/startpage.rs
Normal file
@ -0,0 +1,96 @@
|
||||
//! The `duckduckgo` module handles the scraping of results from the duckduckgo search engine
|
||||
//! by querying the upstream duckduckgo search engine with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::header::HeaderMap;
|
||||
use reqwest::Client;
|
||||
use scraper::Html;
|
||||
|
||||
use crate::models::aggregation_models::SearchResult;
|
||||
|
||||
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||
|
||||
use error_stack::{Report, Result, ResultExt};
|
||||
|
||||
use super::search_result_parser::SearchResultParser;
|
||||
|
||||
/// A new Startpage engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||
pub struct Startpage {
|
||||
/// The parser, used to interpret the search result.
|
||||
parser: SearchResultParser,
|
||||
}
|
||||
|
||||
impl Startpage {
|
||||
/// Creates the Startpage parser.
|
||||
pub fn new() -> Result<Self, EngineError> {
|
||||
Ok(Self {
|
||||
parser: SearchResultParser::new(
|
||||
".no-results",
|
||||
".w-gl__result__main",
|
||||
".w-gl__result-second-line-container>.w-gl__result-title>h3",
|
||||
".w-gl__result-url",
|
||||
".w-gl__description",
|
||||
)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl SearchEngine for Startpage {
|
||||
async fn results(
|
||||
&self,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = match page {
|
||||
1 | 0 => {
|
||||
format!("https://startpage.com/do/dsearch?q={query}&num=10&start=0")
|
||||
}
|
||||
_ => {
|
||||
format!(
|
||||
"https://startpage.com/do/dsearch?q={query}&num=10&start={}",
|
||||
page * 10,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
// initializing HeaderMap and adding appropriate headers.
|
||||
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||
("USER_AGENT".to_string(), user_agent.to_string()),
|
||||
("REFERER".to_string(), "https://google.com/".to_string()),
|
||||
(
|
||||
"CONTENT_TYPE".to_string(),
|
||||
"application/x-www-form-urlencoded".to_string(),
|
||||
),
|
||||
("COOKIE".to_string(), "preferences=connect_to_serverEEE0N1Ndate_timeEEEworldN1Ndisable_family_filterEEE0N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE1N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fnight%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE10N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius".to_string()),
|
||||
]))
|
||||
.change_context(EngineError::UnexpectedError)?;
|
||||
|
||||
let document: Html = Html::parse_document(
|
||||
&Startpage::fetch_html_from_upstream(self, &url, header_map, client).await?,
|
||||
);
|
||||
|
||||
if self.parser.parse_for_no_results(&document).next().is_some() {
|
||||
return Err(Report::new(EngineError::EmptyResultSet));
|
||||
}
|
||||
|
||||
// scrape all the results from the html
|
||||
self.parser
|
||||
.parse_for_results(&document, |title, url, desc| {
|
||||
Some(SearchResult::new(
|
||||
title.inner_html().trim(),
|
||||
&format!("{}", url.inner_html().trim()),
|
||||
desc.inner_html().trim(),
|
||||
&["startpage"],
|
||||
))
|
||||
})
|
||||
}
|
||||
}
|
@ -154,6 +154,10 @@ impl EngineHandler {
|
||||
let engine = crate::engines::brave::Brave::new()?;
|
||||
("brave", Box::new(engine))
|
||||
}
|
||||
"startpage" => {
|
||||
let engine = crate::engines::startpage::Startpage::new()?;
|
||||
("startpage", Box::new(engine))
|
||||
}
|
||||
_ => {
|
||||
return Err(Report::from(EngineError::NoSuchEngineFound(
|
||||
engine_name.to_string(),
|
||||
|
@ -18,7 +18,7 @@ pub fn index(colorscheme: &str, theme: &str) -> Markup {
|
||||
html!(
|
||||
(header(colorscheme, theme))
|
||||
main class="search-container"{
|
||||
img src="../images/websurfx_logo.png" alt="Websurfx meta-search engine logo";
|
||||
img class="websurfx-logo" src="../images/websurfx_logo.svg" alt="Websurfx meta-search engine logo";
|
||||
(bar(&String::default()))
|
||||
(PreEscaped("</div>"))
|
||||
}
|
||||
|
@ -1,18 +1,18 @@
|
||||
-- ### General ###
|
||||
logging = true -- an option to enable or disable logs.
|
||||
debug = false -- an option to enable or disable debug mode.
|
||||
threads = 10 -- the amount of threads that the app will use to run (the value should be greater than 0).
|
||||
debug = false -- an option to enable or disable debug mode.
|
||||
threads = 10 -- the amount of threads that the app will use to run (the value should be greater than 0).
|
||||
|
||||
-- ### Server ###
|
||||
port = "8080" -- port on which server should be launched
|
||||
port = "8080" -- port on which server should be launched
|
||||
binding_ip = "127.0.0.1" --ip address on the which server should be launched.
|
||||
production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users (more than one))
|
||||
production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users (more than one))
|
||||
-- if production_use is set to true
|
||||
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
||||
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
||||
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
||||
rate_limiter = {
|
||||
number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
|
||||
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
||||
number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
|
||||
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
||||
}
|
||||
|
||||
-- ### Search ###
|
||||
@ -43,14 +43,15 @@ safe_search = 2
|
||||
-- tomorrow-night
|
||||
-- }}
|
||||
colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used for the website theme
|
||||
theme = "simple" -- the theme name which should be used for the website
|
||||
theme = "simple" -- the theme name which should be used for the website
|
||||
|
||||
-- ### Caching ###
|
||||
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
||||
cache_expiry_time = 600 -- This option takes the expiry time of the search results (value in seconds and the value should be greater than or equal to 60 seconds).
|
||||
cache_expiry_time = 600 -- This option takes the expiry time of the search results (value in seconds and the value should be greater than or equal to 60 seconds).
|
||||
-- ### Search Engines ###
|
||||
upstream_search_engines = {
|
||||
DuckDuckGo = true,
|
||||
Searx = false,
|
||||
Brave = false,
|
||||
DuckDuckGo = true,
|
||||
Searx = false,
|
||||
Brave = false,
|
||||
Startpage = false,
|
||||
} -- select the upstream search engines from which the results should be fetched.
|
||||
|
Loading…
Reference in New Issue
Block a user