diff --git a/Cargo.lock b/Cargo.lock
index b716074..0bc2c36 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4066,7 +4066,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
[[package]]
name = "websurfx"
-version = "1.3.6"
+version = "1.4.0"
dependencies = [
"actix-cors",
"actix-files",
diff --git a/Cargo.toml b/Cargo.toml
index 1f92639..3a0beb7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "websurfx"
-version = "1.3.6"
+version = "1.4.0"
edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx"
diff --git a/public/images/websurfx_logo.png b/public/images/websurfx_logo.png
index 8667af3..24d39e1 100644
Binary files a/public/images/websurfx_logo.png and b/public/images/websurfx_logo.png differ
diff --git a/public/images/websurfx_logo.svg b/public/images/websurfx_logo.svg
new file mode 100644
index 0000000..2574345
--- /dev/null
+++ b/public/images/websurfx_logo.svg
@@ -0,0 +1,7 @@
+
diff --git a/public/static/themes/simple.css b/public/static/themes/simple.css
index 7c01ca5..5eb8949 100644
--- a/public/static/themes/simple.css
+++ b/public/static/themes/simple.css
@@ -33,6 +33,10 @@ body {
display: flex;
}
+.websurfx-logo {
+ width: clamp(12rem, 40rem, 48rem);
+}
+
/* styles for the search box and search button */
.search_bar {
diff --git a/src/engines/mod.rs b/src/engines/mod.rs
index 2892445..53d720b 100644
--- a/src/engines/mod.rs
+++ b/src/engines/mod.rs
@@ -7,3 +7,4 @@ pub mod brave;
pub mod duckduckgo;
pub mod search_result_parser;
pub mod searx;
+pub mod startpage;
diff --git a/src/engines/startpage.rs b/src/engines/startpage.rs
new file mode 100644
index 0000000..44135e1
--- /dev/null
+++ b/src/engines/startpage.rs
@@ -0,0 +1,96 @@
+//! The `duckduckgo` module handles the scraping of results from the duckduckgo search engine
+//! by querying the upstream duckduckgo search engine with user provided query and with a page
+//! number if provided.
+
+use std::collections::HashMap;
+
+use reqwest::header::HeaderMap;
+use reqwest::Client;
+use scraper::Html;
+
+use crate::models::aggregation_models::SearchResult;
+
+use crate::models::engine_models::{EngineError, SearchEngine};
+
+use error_stack::{Report, Result, ResultExt};
+
+use super::search_result_parser::SearchResultParser;
+
+/// A new Startpage engine type defined in-order to implement the `SearchEngine` trait which allows to
+/// reduce code duplication as well as allows to create vector of different search engines easily.
+pub struct Startpage {
+ /// The parser, used to interpret the search result.
+ parser: SearchResultParser,
+}
+
+impl Startpage {
+ /// Creates the Startpage parser.
+ pub fn new() -> Result {
+ Ok(Self {
+ parser: SearchResultParser::new(
+ ".no-results",
+ ".w-gl__result__main",
+ ".w-gl__result-second-line-container>.w-gl__result-title>h3",
+ ".w-gl__result-url",
+ ".w-gl__description",
+ )?,
+ })
+ }
+}
+
+#[async_trait::async_trait]
+impl SearchEngine for Startpage {
+ async fn results(
+ &self,
+ query: &str,
+ page: u32,
+ user_agent: &str,
+ client: &Client,
+ _safe_search: u8,
+ ) -> Result, EngineError> {
+ // Page number can be missing or empty string and so appropriate handling is required
+ // so that upstream server recieves valid page number.
+ let url: String = match page {
+ 1 | 0 => {
+ format!("https://startpage.com/do/dsearch?q={query}&num=10&start=0")
+ }
+ _ => {
+ format!(
+ "https://startpage.com/do/dsearch?q={query}&num=10&start={}",
+ page * 10,
+ )
+ }
+ };
+
+ // initializing HeaderMap and adding appropriate headers.
+ let header_map = HeaderMap::try_from(&HashMap::from([
+ ("USER_AGENT".to_string(), user_agent.to_string()),
+ ("REFERER".to_string(), "https://google.com/".to_string()),
+ (
+ "CONTENT_TYPE".to_string(),
+ "application/x-www-form-urlencoded".to_string(),
+ ),
+ ("COOKIE".to_string(), "preferences=connect_to_serverEEE0N1Ndate_timeEEEworldN1Ndisable_family_filterEEE0N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE1N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fnight%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE10N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius".to_string()),
+ ]))
+ .change_context(EngineError::UnexpectedError)?;
+
+ let document: Html = Html::parse_document(
+ &Startpage::fetch_html_from_upstream(self, &url, header_map, client).await?,
+ );
+
+ if self.parser.parse_for_no_results(&document).next().is_some() {
+ return Err(Report::new(EngineError::EmptyResultSet));
+ }
+
+ // scrape all the results from the html
+ self.parser
+ .parse_for_results(&document, |title, url, desc| {
+ Some(SearchResult::new(
+ title.inner_html().trim(),
+ &format!("{}", url.inner_html().trim()),
+ desc.inner_html().trim(),
+ &["startpage"],
+ ))
+ })
+ }
+}
diff --git a/src/models/engine_models.rs b/src/models/engine_models.rs
index f8e966e..1ab04ed 100644
--- a/src/models/engine_models.rs
+++ b/src/models/engine_models.rs
@@ -154,6 +154,10 @@ impl EngineHandler {
let engine = crate::engines::brave::Brave::new()?;
("brave", Box::new(engine))
}
+ "startpage" => {
+ let engine = crate::engines::startpage::Startpage::new()?;
+ ("startpage", Box::new(engine))
+ }
_ => {
return Err(Report::from(EngineError::NoSuchEngineFound(
engine_name.to_string(),
diff --git a/src/templates/views/index.rs b/src/templates/views/index.rs
index 3816f22..cfa1eb6 100644
--- a/src/templates/views/index.rs
+++ b/src/templates/views/index.rs
@@ -18,7 +18,7 @@ pub fn index(colorscheme: &str, theme: &str) -> Markup {
html!(
(header(colorscheme, theme))
main class="search-container"{
- img src="../images/websurfx_logo.png" alt="Websurfx meta-search engine logo";
+ img class="websurfx-logo" src="../images/websurfx_logo.svg" alt="Websurfx meta-search engine logo";
(bar(&String::default()))
(PreEscaped(""))
}
diff --git a/websurfx/config.lua b/websurfx/config.lua
index 57d552c..22e2c4f 100644
--- a/websurfx/config.lua
+++ b/websurfx/config.lua
@@ -1,18 +1,18 @@
-- ### General ###
logging = true -- an option to enable or disable logs.
-debug = false -- an option to enable or disable debug mode.
-threads = 10 -- the amount of threads that the app will use to run (the value should be greater than 0).
+debug = false -- an option to enable or disable debug mode.
+threads = 10 -- the amount of threads that the app will use to run (the value should be greater than 0).
-- ### Server ###
-port = "8080" -- port on which server should be launched
+port = "8080" -- port on which server should be launched
binding_ip = "127.0.0.1" --ip address on the which server should be launched.
-production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users (more than one))
+production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users (more than one))
-- if production_use is set to true
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
-request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
+request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
rate_limiter = {
- number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
- time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
+ number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
+ time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
}
-- ### Search ###
@@ -43,14 +43,15 @@ safe_search = 2
-- tomorrow-night
-- }}
colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used for the website theme
-theme = "simple" -- the theme name which should be used for the website
+theme = "simple" -- the theme name which should be used for the website
-- ### Caching ###
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
-cache_expiry_time = 600 -- This option takes the expiry time of the search results (value in seconds and the value should be greater than or equal to 60 seconds).
+cache_expiry_time = 600 -- This option takes the expiry time of the search results (value in seconds and the value should be greater than or equal to 60 seconds).
-- ### Search Engines ###
upstream_search_engines = {
- DuckDuckGo = true,
- Searx = false,
- Brave = false,
+ DuckDuckGo = true,
+ Searx = false,
+ Brave = false,
+ Startpage = false,
} -- select the upstream search engines from which the results should be fetched.