mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-21 21:48:21 -05:00
Refactoring code and separating code into files for better maintainability
This commit is contained in:
parent
bb61ee3afe
commit
f94ac503e2
@ -19,6 +19,8 @@ struct CliArgs {
|
|||||||
|
|
||||||
const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
|
const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
|
||||||
|
|
||||||
|
// A function to check whether port is valid u32 number or is in range
|
||||||
|
// between [1024-65536] otherwise display an appropriate error message.
|
||||||
fn is_port_in_range(s: &str) -> Result<u16, String> {
|
fn is_port_in_range(s: &str) -> Result<u16, String> {
|
||||||
let port: usize = s
|
let port: usize = s
|
||||||
.parse()
|
.parse()
|
||||||
@ -39,6 +41,7 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
|
|||||||
async fn main() -> std::io::Result<()> {
|
async fn main() -> std::io::Result<()> {
|
||||||
let args = CliArgs::parse();
|
let args = CliArgs::parse();
|
||||||
|
|
||||||
|
// Initializing logging middleware with level set to default or info.
|
||||||
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
||||||
|
|
||||||
log::info!("started server on port {}", args.port);
|
log::info!("started server on port {}", args.port);
|
||||||
@ -54,7 +57,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
HttpServer::new(move || {
|
HttpServer::new(move || {
|
||||||
App::new()
|
App::new()
|
||||||
.app_data(handlebars_ref.clone())
|
.app_data(handlebars_ref.clone())
|
||||||
.wrap(Logger::default())
|
.wrap(Logger::default()) // added logging middleware for logging.
|
||||||
// Serve images and static files (css and js files).
|
// Serve images and static files (css and js files).
|
||||||
.service(fs::Files::new("/static", "./public/static").show_files_listing())
|
.service(fs::Files::new("/static", "./public/static").show_files_listing())
|
||||||
.service(fs::Files::new("/images", "./public/images").show_files_listing())
|
.service(fs::Files::new("/images", "./public/images").show_files_listing())
|
||||||
|
@ -48,49 +48,37 @@ pub async fn results(
|
|||||||
let result_url: Selector = Selector::parse(".result__url")?;
|
let result_url: Selector = Selector::parse(".result__url")?;
|
||||||
let result_desc: Selector = Selector::parse(".result__snippet")?;
|
let result_desc: Selector = Selector::parse(".result__snippet")?;
|
||||||
|
|
||||||
let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
|
|
||||||
|
|
||||||
// scrape all the results from the html
|
// scrape all the results from the html
|
||||||
for result in document.select(&results) {
|
Ok(document
|
||||||
let search_result: RawSearchResult = RawSearchResult {
|
.select(&results)
|
||||||
title: result
|
.map(|result| {
|
||||||
.select(&result_title)
|
RawSearchResult::new(
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.inner_html()
|
|
||||||
.trim()
|
|
||||||
.to_string(),
|
|
||||||
visiting_url: format!(
|
|
||||||
"https://{}",
|
|
||||||
result
|
result
|
||||||
.select(&result_url)
|
.select(&result_title)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inner_html()
|
.inner_html()
|
||||||
.trim()
|
.trim()
|
||||||
),
|
.to_string(),
|
||||||
description: result
|
format!(
|
||||||
.select(&result_desc)
|
"https://{}",
|
||||||
.next()
|
result
|
||||||
.unwrap()
|
.select(&result_url)
|
||||||
.inner_html()
|
.next()
|
||||||
.trim()
|
.unwrap()
|
||||||
.to_string(),
|
.inner_html()
|
||||||
engine: vec!["duckduckgo".to_string()],
|
.trim()
|
||||||
};
|
),
|
||||||
search_results.insert(
|
|
||||||
format!(
|
|
||||||
"https://{}",
|
|
||||||
result
|
result
|
||||||
.select(&result_url)
|
.select(&result_desc)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inner_html()
|
.inner_html()
|
||||||
.trim()
|
.trim()
|
||||||
),
|
.to_string(),
|
||||||
search_result,
|
vec!["duckduckgo".to_string()],
|
||||||
);
|
)
|
||||||
}
|
})
|
||||||
|
.map(|search_result| (search_result.visiting_url.clone(), search_result))
|
||||||
Ok(search_results)
|
.collect())
|
||||||
}
|
}
|
||||||
|
@ -43,47 +43,36 @@ pub async fn results(
|
|||||||
let result_url: Selector = Selector::parse("h3>a")?;
|
let result_url: Selector = Selector::parse("h3>a")?;
|
||||||
let result_desc: Selector = Selector::parse(".content")?;
|
let result_desc: Selector = Selector::parse(".content")?;
|
||||||
|
|
||||||
let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
|
|
||||||
|
|
||||||
// scrape all the results from the html
|
// scrape all the results from the html
|
||||||
for result in document.select(&results) {
|
Ok(document
|
||||||
let search_result: RawSearchResult = RawSearchResult {
|
.select(&results)
|
||||||
title: result
|
.map(|result| {
|
||||||
.select(&result_title)
|
RawSearchResult::new(
|
||||||
.next()
|
result
|
||||||
.unwrap()
|
.select(&result_title)
|
||||||
.inner_html()
|
.next()
|
||||||
.trim()
|
.unwrap()
|
||||||
.to_string(),
|
.inner_html()
|
||||||
visiting_url: result
|
.trim()
|
||||||
.select(&result_url)
|
.to_string(),
|
||||||
.next()
|
result
|
||||||
.unwrap()
|
.select(&result_url)
|
||||||
.value()
|
.next()
|
||||||
.attr("href")
|
.unwrap()
|
||||||
.unwrap()
|
.value()
|
||||||
.to_string(),
|
.attr("href")
|
||||||
description: result
|
.unwrap()
|
||||||
.select(&result_desc)
|
.to_string(),
|
||||||
.next()
|
result
|
||||||
.unwrap()
|
.select(&result_desc)
|
||||||
.inner_html()
|
.next()
|
||||||
.trim()
|
.unwrap()
|
||||||
.to_string(),
|
.inner_html()
|
||||||
engine: vec!["searx".to_string()],
|
.trim()
|
||||||
};
|
.to_string(),
|
||||||
search_results.insert(
|
vec!["searx".to_string()],
|
||||||
result
|
)
|
||||||
.select(&result_url)
|
})
|
||||||
.next()
|
.map(|search_result| (search_result.visiting_url.clone(), search_result))
|
||||||
.unwrap()
|
.collect())
|
||||||
.value()
|
|
||||||
.attr("href")
|
|
||||||
.unwrap()
|
|
||||||
.to_string(),
|
|
||||||
search_result,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(search_results)
|
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,24 @@ pub struct SearchResult {
|
|||||||
pub engine: Vec<String>,
|
pub engine: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl SearchResult {
|
||||||
|
pub fn new(
|
||||||
|
title: String,
|
||||||
|
visiting_url: String,
|
||||||
|
url: String,
|
||||||
|
description: String,
|
||||||
|
engine: Vec<String>,
|
||||||
|
) -> Self {
|
||||||
|
SearchResult {
|
||||||
|
title,
|
||||||
|
visiting_url,
|
||||||
|
url,
|
||||||
|
description,
|
||||||
|
engine,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct RawSearchResult {
|
pub struct RawSearchResult {
|
||||||
pub title: String,
|
pub title: String,
|
||||||
pub visiting_url: String,
|
pub visiting_url: String,
|
||||||
@ -17,9 +35,37 @@ pub struct RawSearchResult {
|
|||||||
pub engine: Vec<String>,
|
pub engine: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl RawSearchResult {
|
||||||
|
pub fn new(
|
||||||
|
title: String,
|
||||||
|
visiting_url: String,
|
||||||
|
description: String,
|
||||||
|
engine: Vec<String>,
|
||||||
|
) -> Self {
|
||||||
|
RawSearchResult {
|
||||||
|
title,
|
||||||
|
visiting_url,
|
||||||
|
description,
|
||||||
|
engine,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn add_engines(&mut self, engine: String) {
|
||||||
|
self.engine.push(engine)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResults {
|
pub struct SearchResults {
|
||||||
pub results: Vec<SearchResult>,
|
pub results: Vec<SearchResult>,
|
||||||
pub page_query: String,
|
pub page_query: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl SearchResults {
|
||||||
|
pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
|
||||||
|
SearchResults {
|
||||||
|
results,
|
||||||
|
page_query,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use fake_useragent::{Browsers, UserAgentsBuilder};
|
use super::{
|
||||||
|
aggregation_models::{RawSearchResult, SearchResult, SearchResults},
|
||||||
|
user_agent::random_user_agent,
|
||||||
|
};
|
||||||
|
|
||||||
use super::aggregation_models::{RawSearchResult, SearchResult, SearchResults};
|
|
||||||
use crate::engines::{duckduckgo, searx};
|
use crate::engines::{duckduckgo, searx};
|
||||||
|
|
||||||
// A function that aggregates all the scraped results from the above upstream engines and
|
// A function that aggregates all the scraped results from the above upstream engines and
|
||||||
@ -20,23 +22,7 @@ pub async fn aggregate(
|
|||||||
query: &str,
|
query: &str,
|
||||||
page: Option<u32>,
|
page: Option<u32>,
|
||||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||||
// Generate random user agent to improve privacy of the user.
|
let user_agent: String = random_user_agent();
|
||||||
let user_agent: String = UserAgentsBuilder::new()
|
|
||||||
.cache(false)
|
|
||||||
.dir("/tmp")
|
|
||||||
.thread(1)
|
|
||||||
.set_browsers(
|
|
||||||
Browsers::new()
|
|
||||||
.set_chrome()
|
|
||||||
.set_safari()
|
|
||||||
.set_edge()
|
|
||||||
.set_firefox()
|
|
||||||
.set_mozilla(),
|
|
||||||
)
|
|
||||||
.build()
|
|
||||||
.random()
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
|
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
|
||||||
|
|
||||||
let ddg_map_results: HashMap<String, RawSearchResult> =
|
let ddg_map_results: HashMap<String, RawSearchResult> =
|
||||||
@ -46,32 +32,35 @@ pub async fn aggregate(
|
|||||||
|
|
||||||
result_map.extend(ddg_map_results);
|
result_map.extend(ddg_map_results);
|
||||||
|
|
||||||
for (key, value) in searx_map_results.into_iter() {
|
searx_map_results.into_iter().for_each(|(key, value)| {
|
||||||
if result_map.contains_key(&key) {
|
result_map
|
||||||
result_map
|
.entry(key)
|
||||||
.get_mut(&key)
|
.and_modify(|result| {
|
||||||
.unwrap()
|
result.add_engines(value.engine[0].clone());
|
||||||
.engine
|
})
|
||||||
.push(value.engine.get(0).unwrap().to_string())
|
.or_insert_with(|| -> RawSearchResult {
|
||||||
} else {
|
RawSearchResult::new(
|
||||||
result_map.insert(key, value);
|
value.title.clone(),
|
||||||
}
|
value.visiting_url.clone(),
|
||||||
}
|
value.description.clone(),
|
||||||
|
value.engine.clone(),
|
||||||
|
)
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
let mut search_results: Vec<SearchResult> = Vec::new();
|
Ok(SearchResults::new(
|
||||||
|
result_map
|
||||||
for (key, value) in result_map.into_iter() {
|
.into_iter()
|
||||||
search_results.push(SearchResult {
|
.map(|(key, value)| {
|
||||||
title: value.title,
|
SearchResult::new(
|
||||||
visiting_url: value.visiting_url,
|
value.title,
|
||||||
url: key,
|
value.visiting_url,
|
||||||
description: value.description,
|
key,
|
||||||
engine: value.engine,
|
value.description,
|
||||||
})
|
value.engine,
|
||||||
}
|
)
|
||||||
|
})
|
||||||
Ok(SearchResults {
|
.collect(),
|
||||||
results: search_results,
|
query.to_string(),
|
||||||
page_query: query.to_string(),
|
))
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
pub mod aggregation_models;
|
pub mod aggregation_models;
|
||||||
pub mod aggregator;
|
pub mod aggregator;
|
||||||
|
pub mod user_agent;
|
||||||
|
20
src/search_results_handler/user_agent.rs
Normal file
20
src/search_results_handler/user_agent.rs
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
use fake_useragent::{Browsers, UserAgentsBuilder};
|
||||||
|
|
||||||
|
// A function to generate random user agent to improve privacy of the user.
|
||||||
|
pub fn random_user_agent() -> String {
|
||||||
|
UserAgentsBuilder::new()
|
||||||
|
.cache(false)
|
||||||
|
.dir("/tmp")
|
||||||
|
.thread(1)
|
||||||
|
.set_browsers(
|
||||||
|
Browsers::new()
|
||||||
|
.set_chrome()
|
||||||
|
.set_safari()
|
||||||
|
.set_edge()
|
||||||
|
.set_firefox()
|
||||||
|
.set_mozilla(),
|
||||||
|
)
|
||||||
|
.build()
|
||||||
|
.random()
|
||||||
|
.to_string()
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user