0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-10-18 06:22:53 -04:00

Refactoring code and separating code into files for better maintainability

This commit is contained in:
neon_arch 2023-04-25 16:30:04 +03:00
parent bb61ee3afe
commit f94ac503e2
7 changed files with 159 additions and 123 deletions

View File

@ -19,6 +19,8 @@ struct CliArgs {
const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
// A function to check whether port is valid u32 number or is in range
// between [1024-65536] otherwise display an appropriate error message.
fn is_port_in_range(s: &str) -> Result<u16, String> {
let port: usize = s
.parse()
@ -39,6 +41,7 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
async fn main() -> std::io::Result<()> {
let args = CliArgs::parse();
// Initializing logging middleware with level set to default or info.
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
log::info!("started server on port {}", args.port);
@ -54,7 +57,7 @@ async fn main() -> std::io::Result<()> {
HttpServer::new(move || {
App::new()
.app_data(handlebars_ref.clone())
.wrap(Logger::default())
.wrap(Logger::default()) // added logging middleware for logging.
// Serve images and static files (css and js files).
.service(fs::Files::new("/static", "./public/static").show_files_listing())
.service(fs::Files::new("/images", "./public/images").show_files_listing())

View File

@ -48,49 +48,37 @@ pub async fn results(
let result_url: Selector = Selector::parse(".result__url")?;
let result_desc: Selector = Selector::parse(".result__snippet")?;
let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
// scrape all the results from the html
for result in document.select(&results) {
let search_result: RawSearchResult = RawSearchResult {
title: result
.select(&result_title)
.next()
.unwrap()
.inner_html()
.trim()
.to_string(),
visiting_url: format!(
"https://{}",
Ok(document
.select(&results)
.map(|result| {
RawSearchResult::new(
result
.select(&result_url)
.select(&result_title)
.next()
.unwrap()
.inner_html()
.trim()
),
description: result
.select(&result_desc)
.next()
.unwrap()
.inner_html()
.trim()
.to_string(),
engine: vec!["duckduckgo".to_string()],
};
search_results.insert(
format!(
"https://{}",
.to_string(),
format!(
"https://{}",
result
.select(&result_url)
.next()
.unwrap()
.inner_html()
.trim()
),
result
.select(&result_url)
.select(&result_desc)
.next()
.unwrap()
.inner_html()
.trim()
),
search_result,
);
}
Ok(search_results)
.to_string(),
vec!["duckduckgo".to_string()],
)
})
.map(|search_result| (search_result.visiting_url.clone(), search_result))
.collect())
}

View File

@ -43,47 +43,36 @@ pub async fn results(
let result_url: Selector = Selector::parse("h3>a")?;
let result_desc: Selector = Selector::parse(".content")?;
let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
// scrape all the results from the html
for result in document.select(&results) {
let search_result: RawSearchResult = RawSearchResult {
title: result
.select(&result_title)
.next()
.unwrap()
.inner_html()
.trim()
.to_string(),
visiting_url: result
.select(&result_url)
.next()
.unwrap()
.value()
.attr("href")
.unwrap()
.to_string(),
description: result
.select(&result_desc)
.next()
.unwrap()
.inner_html()
.trim()
.to_string(),
engine: vec!["searx".to_string()],
};
search_results.insert(
result
.select(&result_url)
.next()
.unwrap()
.value()
.attr("href")
.unwrap()
.to_string(),
search_result,
);
}
Ok(search_results)
Ok(document
.select(&results)
.map(|result| {
RawSearchResult::new(
result
.select(&result_title)
.next()
.unwrap()
.inner_html()
.trim()
.to_string(),
result
.select(&result_url)
.next()
.unwrap()
.value()
.attr("href")
.unwrap()
.to_string(),
result
.select(&result_desc)
.next()
.unwrap()
.inner_html()
.trim()
.to_string(),
vec!["searx".to_string()],
)
})
.map(|search_result| (search_result.visiting_url.clone(), search_result))
.collect())
}

View File

@ -10,6 +10,24 @@ pub struct SearchResult {
pub engine: Vec<String>,
}
impl SearchResult {
pub fn new(
title: String,
visiting_url: String,
url: String,
description: String,
engine: Vec<String>,
) -> Self {
SearchResult {
title,
visiting_url,
url,
description,
engine,
}
}
}
pub struct RawSearchResult {
pub title: String,
pub visiting_url: String,
@ -17,9 +35,37 @@ pub struct RawSearchResult {
pub engine: Vec<String>,
}
impl RawSearchResult {
pub fn new(
title: String,
visiting_url: String,
description: String,
engine: Vec<String>,
) -> Self {
RawSearchResult {
title,
visiting_url,
description,
engine,
}
}
pub fn add_engines(&mut self, engine: String) {
self.engine.push(engine)
}
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResults {
pub results: Vec<SearchResult>,
pub page_query: String,
}
impl SearchResults {
pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
SearchResults {
results,
page_query,
}
}
}

View File

@ -1,8 +1,10 @@
use std::collections::HashMap;
use fake_useragent::{Browsers, UserAgentsBuilder};
use super::{
aggregation_models::{RawSearchResult, SearchResult, SearchResults},
user_agent::random_user_agent,
};
use super::aggregation_models::{RawSearchResult, SearchResult, SearchResults};
use crate::engines::{duckduckgo, searx};
// A function that aggregates all the scraped results from the above upstream engines and
@ -20,23 +22,7 @@ pub async fn aggregate(
query: &str,
page: Option<u32>,
) -> Result<SearchResults, Box<dyn std::error::Error>> {
// Generate random user agent to improve privacy of the user.
let user_agent: String = UserAgentsBuilder::new()
.cache(false)
.dir("/tmp")
.thread(1)
.set_browsers(
Browsers::new()
.set_chrome()
.set_safari()
.set_edge()
.set_firefox()
.set_mozilla(),
)
.build()
.random()
.to_string();
let user_agent: String = random_user_agent();
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
let ddg_map_results: HashMap<String, RawSearchResult> =
@ -46,32 +32,35 @@ pub async fn aggregate(
result_map.extend(ddg_map_results);
for (key, value) in searx_map_results.into_iter() {
if result_map.contains_key(&key) {
result_map
.get_mut(&key)
.unwrap()
.engine
.push(value.engine.get(0).unwrap().to_string())
} else {
result_map.insert(key, value);
}
}
searx_map_results.into_iter().for_each(|(key, value)| {
result_map
.entry(key)
.and_modify(|result| {
result.add_engines(value.engine[0].clone());
})
.or_insert_with(|| -> RawSearchResult {
RawSearchResult::new(
value.title.clone(),
value.visiting_url.clone(),
value.description.clone(),
value.engine.clone(),
)
});
});
let mut search_results: Vec<SearchResult> = Vec::new();
for (key, value) in result_map.into_iter() {
search_results.push(SearchResult {
title: value.title,
visiting_url: value.visiting_url,
url: key,
description: value.description,
engine: value.engine,
})
}
Ok(SearchResults {
results: search_results,
page_query: query.to_string(),
})
Ok(SearchResults::new(
result_map
.into_iter()
.map(|(key, value)| {
SearchResult::new(
value.title,
value.visiting_url,
key,
value.description,
value.engine,
)
})
.collect(),
query.to_string(),
))
}

View File

@ -1,2 +1,3 @@
pub mod aggregation_models;
pub mod aggregator;
pub mod user_agent;

View File

@ -0,0 +1,20 @@
use fake_useragent::{Browsers, UserAgentsBuilder};
// A function to generate random user agent to improve privacy of the user.
pub fn random_user_agent() -> String {
UserAgentsBuilder::new()
.cache(false)
.dir("/tmp")
.thread(1)
.set_browsers(
Browsers::new()
.set_chrome()
.set_safari()
.set_edge()
.set_firefox()
.set_mozilla(),
)
.build()
.random()
.to_string()
}