0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-21 21:48:21 -05:00

️ perf: rewrite the code by using a constant storing a prebuilt client globally for each thread (#384)

This commit is contained in:
neon_arch 2023-11-20 15:27:49 +03:00
parent 05bf05b0dd
commit b42adaa5a3
2 changed files with 20 additions and 6 deletions

View File

@ -3,7 +3,8 @@
use super::aggregation_models::SearchResult; use super::aggregation_models::SearchResult;
use error_stack::{Report, Result, ResultExt}; use error_stack::{Report, Result, ResultExt};
use std::{collections::HashMap, fmt, time::Duration}; use reqwest::Client;
use std::{collections::HashMap, fmt};
/// A custom error type used for handle engine associated errors. /// A custom error type used for handle engine associated errors.
#[derive(Debug)] #[derive(Debug)]
@ -71,12 +72,11 @@ pub trait SearchEngine: Sync + Send {
&self, &self,
url: &str, url: &str,
header_map: reqwest::header::HeaderMap, header_map: reqwest::header::HeaderMap,
request_timeout: u8, client: &Client,
) -> Result<String, EngineError> { ) -> Result<String, EngineError> {
// fetch the html from upstream search engine // fetch the html from upstream search engine
Ok(reqwest::Client::new() Ok(client
.get(url) .get(url)
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
.headers(header_map) // add spoofed headers to emulate human behavior .headers(header_map) // add spoofed headers to emulate human behavior
.send() .send()
.await .await
@ -109,7 +109,7 @@ pub trait SearchEngine: Sync + Send {
query: &str, query: &str,
page: u32, page: u32,
user_agent: &str, user_agent: &str,
request_timeout: u8, client: &Client,
safe_search: u8, safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError>; ) -> Result<HashMap<String, SearchResult>, EngineError>;
} }

View File

@ -9,6 +9,7 @@ use crate::models::{
}; };
use error_stack::Report; use error_stack::Report;
use regex::Regex; use regex::Regex;
use reqwest::{Client, ClientBuilder};
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use std::{ use std::{
collections::HashMap, collections::HashMap,
@ -18,6 +19,9 @@ use std::{
use std::{fs::File, io::BufRead}; use std::{fs::File, io::BufRead};
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
/// A constant for holding the prebuilt Client globally in the app.
static CLIENT: std::sync::OnceLock<Client> = std::sync::OnceLock::new();
/// Aliases for long type annotations /// Aliases for long type annotations
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>; type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
@ -68,6 +72,16 @@ pub async fn aggregate(
request_timeout: u8, request_timeout: u8,
safe_search: u8, safe_search: u8,
) -> Result<SearchResults, Box<dyn std::error::Error>> { ) -> Result<SearchResults, Box<dyn std::error::Error>> {
let client = CLIENT.get_or_init(|| {
ClientBuilder::new()
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
.https_only(true)
.gzip(true)
.brotli(true)
.build()
.unwrap()
});
let user_agent: &str = random_user_agent(); let user_agent: &str = random_user_agent();
// Add a random delay before making the request. // Add a random delay before making the request.
@ -88,7 +102,7 @@ pub async fn aggregate(
let query: String = query.to_owned(); let query: String = query.to_owned();
tasks.push(tokio::spawn(async move { tasks.push(tokio::spawn(async move {
search_engine search_engine
.results(&query, page, user_agent, request_timeout, safe_search) .results(&query, page, user_agent, client, safe_search)
.await .await
})); }));
} }