2023-05-31 12:54:51 -04:00
|
|
|
//! This module provides the error enum to handle different errors associated while requesting data from
|
|
|
|
//! the upstream search engines with the search query provided by the user.
|
|
|
|
|
2023-07-15 12:50:31 -04:00
|
|
|
use crate::results::aggregation_models::RawSearchResult;
|
2023-07-11 12:41:34 -04:00
|
|
|
use error_stack::{IntoReport, Result, ResultExt};
|
|
|
|
use std::{collections::HashMap, fmt, time::Duration};
|
2023-05-31 12:54:51 -04:00
|
|
|
|
|
|
|
/// A custom error type used for handle engine associated errors.
|
|
|
|
///
|
|
|
|
/// This enum provides variants three different categories of errors:
|
|
|
|
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
|
|
|
|
/// etc.
|
|
|
|
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
|
|
|
|
/// search engines.
|
|
|
|
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
|
|
|
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
2023-06-29 13:18:42 -04:00
|
|
|
/// all other errors occurring within the code handling the `upstream search engines`.
|
2023-05-16 05:22:00 -04:00
|
|
|
#[derive(Debug)]
|
2023-06-14 08:42:30 -04:00
|
|
|
pub enum EngineError {
|
2023-05-31 12:54:51 -04:00
|
|
|
EmptyResultSet,
|
2023-06-14 08:42:30 -04:00
|
|
|
RequestError,
|
|
|
|
UnexpectedError,
|
2023-05-31 12:54:51 -04:00
|
|
|
}
|
|
|
|
|
2023-06-14 08:42:30 -04:00
|
|
|
impl fmt::Display for EngineError {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
2023-05-31 12:54:51 -04:00
|
|
|
match self {
|
2023-06-14 08:42:30 -04:00
|
|
|
EngineError::EmptyResultSet => {
|
2023-05-31 12:54:51 -04:00
|
|
|
write!(f, "The upstream search engine returned an empty result set")
|
|
|
|
}
|
2023-06-14 08:42:30 -04:00
|
|
|
EngineError::RequestError => {
|
2023-06-14 18:48:37 -04:00
|
|
|
write!(
|
|
|
|
f,
|
|
|
|
"Error occurred while requesting data from upstream search engine"
|
|
|
|
)
|
|
|
|
}
|
|
|
|
EngineError::UnexpectedError => {
|
|
|
|
write!(f, "An unexpected error occurred while processing the data")
|
2023-06-04 04:56:07 -04:00
|
|
|
}
|
2023-05-31 12:54:51 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-11 12:41:34 -04:00
|
|
|
impl error_stack::Context for EngineError {}
|
|
|
|
|
2023-08-17 16:48:20 -04:00
|
|
|
/// A trait to define common behavior for all search engines.
|
2023-07-11 12:41:34 -04:00
|
|
|
#[async_trait::async_trait]
|
|
|
|
pub trait SearchEngine {
|
|
|
|
async fn fetch_html_from_upstream(
|
|
|
|
&self,
|
|
|
|
url: String,
|
|
|
|
header_map: reqwest::header::HeaderMap,
|
2023-07-30 03:53:48 -04:00
|
|
|
request_timeout: u8,
|
2023-07-11 12:41:34 -04:00
|
|
|
) -> Result<String, EngineError> {
|
|
|
|
// fetch the html from upstream search engine
|
|
|
|
Ok(reqwest::Client::new()
|
|
|
|
.get(url)
|
2023-07-30 03:53:48 -04:00
|
|
|
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
2023-08-17 16:48:20 -04:00
|
|
|
.headers(header_map) // add spoofed headers to emulate human behavior
|
2023-07-11 12:41:34 -04:00
|
|
|
.send()
|
|
|
|
.await
|
|
|
|
.into_report()
|
|
|
|
.change_context(EngineError::RequestError)?
|
|
|
|
.text()
|
|
|
|
.await
|
|
|
|
.into_report()
|
|
|
|
.change_context(EngineError::RequestError)?)
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn results(
|
|
|
|
&self,
|
|
|
|
query: String,
|
|
|
|
page: u32,
|
|
|
|
user_agent: String,
|
2023-07-30 03:53:48 -04:00
|
|
|
request_timeout: u8,
|
2023-07-11 12:41:34 -04:00
|
|
|
) -> Result<HashMap<String, RawSearchResult>, EngineError>;
|
|
|
|
}
|