0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-21 21:48:21 -05:00

Refactoring code and separating code into files for better maintainability

This commit is contained in:
neon_arch 2023-04-25 16:30:04 +03:00
parent bb61ee3afe
commit f94ac503e2
7 changed files with 159 additions and 123 deletions

View File

@ -19,6 +19,8 @@ struct CliArgs {
const PORT_RANGE: RangeInclusive<usize> = 1024..=65535; const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
// A function to check whether port is valid u32 number or is in range
// between [1024-65536] otherwise display an appropriate error message.
fn is_port_in_range(s: &str) -> Result<u16, String> { fn is_port_in_range(s: &str) -> Result<u16, String> {
let port: usize = s let port: usize = s
.parse() .parse()
@ -39,6 +41,7 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
async fn main() -> std::io::Result<()> { async fn main() -> std::io::Result<()> {
let args = CliArgs::parse(); let args = CliArgs::parse();
// Initializing logging middleware with level set to default or info.
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
log::info!("started server on port {}", args.port); log::info!("started server on port {}", args.port);
@ -54,7 +57,7 @@ async fn main() -> std::io::Result<()> {
HttpServer::new(move || { HttpServer::new(move || {
App::new() App::new()
.app_data(handlebars_ref.clone()) .app_data(handlebars_ref.clone())
.wrap(Logger::default()) .wrap(Logger::default()) // added logging middleware for logging.
// Serve images and static files (css and js files). // Serve images and static files (css and js files).
.service(fs::Files::new("/static", "./public/static").show_files_listing()) .service(fs::Files::new("/static", "./public/static").show_files_listing())
.service(fs::Files::new("/images", "./public/images").show_files_listing()) .service(fs::Files::new("/images", "./public/images").show_files_listing())

View File

@ -48,49 +48,37 @@ pub async fn results(
let result_url: Selector = Selector::parse(".result__url")?; let result_url: Selector = Selector::parse(".result__url")?;
let result_desc: Selector = Selector::parse(".result__snippet")?; let result_desc: Selector = Selector::parse(".result__snippet")?;
let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
// scrape all the results from the html // scrape all the results from the html
for result in document.select(&results) { Ok(document
let search_result: RawSearchResult = RawSearchResult { .select(&results)
title: result .map(|result| {
.select(&result_title) RawSearchResult::new(
.next()
.unwrap()
.inner_html()
.trim()
.to_string(),
visiting_url: format!(
"https://{}",
result result
.select(&result_url) .select(&result_title)
.next() .next()
.unwrap() .unwrap()
.inner_html() .inner_html()
.trim() .trim()
), .to_string(),
description: result format!(
.select(&result_desc) "https://{}",
.next() result
.unwrap() .select(&result_url)
.inner_html() .next()
.trim() .unwrap()
.to_string(), .inner_html()
engine: vec!["duckduckgo".to_string()], .trim()
}; ),
search_results.insert(
format!(
"https://{}",
result result
.select(&result_url) .select(&result_desc)
.next() .next()
.unwrap() .unwrap()
.inner_html() .inner_html()
.trim() .trim()
), .to_string(),
search_result, vec!["duckduckgo".to_string()],
); )
} })
.map(|search_result| (search_result.visiting_url.clone(), search_result))
Ok(search_results) .collect())
} }

View File

@ -43,47 +43,36 @@ pub async fn results(
let result_url: Selector = Selector::parse("h3>a")?; let result_url: Selector = Selector::parse("h3>a")?;
let result_desc: Selector = Selector::parse(".content")?; let result_desc: Selector = Selector::parse(".content")?;
let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
// scrape all the results from the html // scrape all the results from the html
for result in document.select(&results) { Ok(document
let search_result: RawSearchResult = RawSearchResult { .select(&results)
title: result .map(|result| {
.select(&result_title) RawSearchResult::new(
.next() result
.unwrap() .select(&result_title)
.inner_html() .next()
.trim() .unwrap()
.to_string(), .inner_html()
visiting_url: result .trim()
.select(&result_url) .to_string(),
.next() result
.unwrap() .select(&result_url)
.value() .next()
.attr("href") .unwrap()
.unwrap() .value()
.to_string(), .attr("href")
description: result .unwrap()
.select(&result_desc) .to_string(),
.next() result
.unwrap() .select(&result_desc)
.inner_html() .next()
.trim() .unwrap()
.to_string(), .inner_html()
engine: vec!["searx".to_string()], .trim()
}; .to_string(),
search_results.insert( vec!["searx".to_string()],
result )
.select(&result_url) })
.next() .map(|search_result| (search_result.visiting_url.clone(), search_result))
.unwrap() .collect())
.value()
.attr("href")
.unwrap()
.to_string(),
search_result,
);
}
Ok(search_results)
} }

View File

@ -10,6 +10,24 @@ pub struct SearchResult {
pub engine: Vec<String>, pub engine: Vec<String>,
} }
impl SearchResult {
pub fn new(
title: String,
visiting_url: String,
url: String,
description: String,
engine: Vec<String>,
) -> Self {
SearchResult {
title,
visiting_url,
url,
description,
engine,
}
}
}
pub struct RawSearchResult { pub struct RawSearchResult {
pub title: String, pub title: String,
pub visiting_url: String, pub visiting_url: String,
@ -17,9 +35,37 @@ pub struct RawSearchResult {
pub engine: Vec<String>, pub engine: Vec<String>,
} }
impl RawSearchResult {
pub fn new(
title: String,
visiting_url: String,
description: String,
engine: Vec<String>,
) -> Self {
RawSearchResult {
title,
visiting_url,
description,
engine,
}
}
pub fn add_engines(&mut self, engine: String) {
self.engine.push(engine)
}
}
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct SearchResults { pub struct SearchResults {
pub results: Vec<SearchResult>, pub results: Vec<SearchResult>,
pub page_query: String, pub page_query: String,
} }
impl SearchResults {
pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
SearchResults {
results,
page_query,
}
}
}

View File

@ -1,8 +1,10 @@
use std::collections::HashMap; use std::collections::HashMap;
use fake_useragent::{Browsers, UserAgentsBuilder}; use super::{
aggregation_models::{RawSearchResult, SearchResult, SearchResults},
user_agent::random_user_agent,
};
use super::aggregation_models::{RawSearchResult, SearchResult, SearchResults};
use crate::engines::{duckduckgo, searx}; use crate::engines::{duckduckgo, searx};
// A function that aggregates all the scraped results from the above upstream engines and // A function that aggregates all the scraped results from the above upstream engines and
@ -20,23 +22,7 @@ pub async fn aggregate(
query: &str, query: &str,
page: Option<u32>, page: Option<u32>,
) -> Result<SearchResults, Box<dyn std::error::Error>> { ) -> Result<SearchResults, Box<dyn std::error::Error>> {
// Generate random user agent to improve privacy of the user. let user_agent: String = random_user_agent();
let user_agent: String = UserAgentsBuilder::new()
.cache(false)
.dir("/tmp")
.thread(1)
.set_browsers(
Browsers::new()
.set_chrome()
.set_safari()
.set_edge()
.set_firefox()
.set_mozilla(),
)
.build()
.random()
.to_string();
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new(); let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
let ddg_map_results: HashMap<String, RawSearchResult> = let ddg_map_results: HashMap<String, RawSearchResult> =
@ -46,32 +32,35 @@ pub async fn aggregate(
result_map.extend(ddg_map_results); result_map.extend(ddg_map_results);
for (key, value) in searx_map_results.into_iter() { searx_map_results.into_iter().for_each(|(key, value)| {
if result_map.contains_key(&key) { result_map
result_map .entry(key)
.get_mut(&key) .and_modify(|result| {
.unwrap() result.add_engines(value.engine[0].clone());
.engine })
.push(value.engine.get(0).unwrap().to_string()) .or_insert_with(|| -> RawSearchResult {
} else { RawSearchResult::new(
result_map.insert(key, value); value.title.clone(),
} value.visiting_url.clone(),
} value.description.clone(),
value.engine.clone(),
)
});
});
let mut search_results: Vec<SearchResult> = Vec::new(); Ok(SearchResults::new(
result_map
for (key, value) in result_map.into_iter() { .into_iter()
search_results.push(SearchResult { .map(|(key, value)| {
title: value.title, SearchResult::new(
visiting_url: value.visiting_url, value.title,
url: key, value.visiting_url,
description: value.description, key,
engine: value.engine, value.description,
}) value.engine,
} )
})
Ok(SearchResults { .collect(),
results: search_results, query.to_string(),
page_query: query.to_string(), ))
})
} }

View File

@ -1,2 +1,3 @@
pub mod aggregation_models; pub mod aggregation_models;
pub mod aggregator; pub mod aggregator;
pub mod user_agent;

View File

@ -0,0 +1,20 @@
use fake_useragent::{Browsers, UserAgentsBuilder};
// A function to generate random user agent to improve privacy of the user.
pub fn random_user_agent() -> String {
UserAgentsBuilder::new()
.cache(false)
.dir("/tmp")
.thread(1)
.set_browsers(
Browsers::new()
.set_chrome()
.set_safari()
.set_edge()
.set_firefox()
.set_mozilla(),
)
.build()
.random()
.to_string()
}