//! This module provides the error enum to handle different errors associated while requesting data from //! the upstream search engines with the search query provided by the user. use crate::results::aggregation_models::SearchResult; use error_stack::{IntoReport, Result, ResultExt}; use std::{collections::HashMap, fmt, time::Duration}; /// A custom error type used for handle engine associated errors. #[derive(Debug)] pub enum EngineError { /// This variant handles all request related errors like forbidden, not found, /// etc. EmptyResultSet, /// This variant handles the not results found error provide by the upstream /// search engines. RequestError, /// This variant handles all the errors which are unexpected or occur rarely /// and are errors mostly related to failure in initialization of HeaderMap, /// Selector errors and all other errors occurring within the code handling /// the `upstream search engines`. UnexpectedError, } impl fmt::Display for EngineError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { EngineError::EmptyResultSet => { write!(f, "The upstream search engine returned an empty result set") } EngineError::RequestError => { write!( f, "Error occurred while requesting data from upstream search engine" ) } EngineError::UnexpectedError => { write!(f, "An unexpected error occurred while processing the data") } } } } impl error_stack::Context for EngineError {} /// A trait to define common behavior for all search engines. #[async_trait::async_trait] pub trait SearchEngine: Sync + Send { /// This helper function fetches/requests the search results from the upstream search engine in /// an html form. /// /// # Arguments /// /// * `url` - It takes the url of the upstream search engine with the user requested search /// query appended in the search parameters. /// * `header_map` - It takes the http request headers to be sent to the upstream engine in /// order to prevent being detected as a bot. It takes the header as a HeaderMap type. /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit /// the amount of time for each request to remain connected when until the results can be provided /// by the upstream engine. /// /// # Error /// /// It returns the html data as a string if the upstream engine provides the data as expected /// otherwise it returns a custom `EngineError`. async fn fetch_html_from_upstream( &self, url: String, header_map: reqwest::header::HeaderMap, request_timeout: u8, ) -> Result { // fetch the html from upstream search engine Ok(reqwest::Client::new() .get(url) .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server .headers(header_map) // add spoofed headers to emulate human behavior .send() .await .into_report() .change_context(EngineError::RequestError)? .text() .await .into_report() .change_context(EngineError::RequestError)?) } /// This function scrapes results from the upstream engine and puts all the scraped results like /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult /// struct and then returns it within a Result enum. /// /// # Arguments /// /// * `query` - Takes the user provided query to query to the upstream search engine with. /// * `page` - Takes an u32 as an argument. /// * `user_agent` - Takes a random user agent string as an argument. /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. /// /// # Errors /// /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to /// provide results for the requested search query and also returns error if the scraping selector /// or HeaderMap fails to initialize. async fn results( &self, query: String, page: u32, user_agent: String, request_timeout: u8, ) -> Result, EngineError>; } /// A named struct which stores the engine struct with the name of the associated engine. pub struct EngineHandler { /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements /// the `SearchEngine` trait. engine: Box, /// It stores the name of the engine to which the struct is associated to. name: &'static str, } impl Clone for EngineHandler { fn clone(&self) -> Self { Self::new(self.name).unwrap() } } impl EngineHandler { /// Parses an engine name into an engine handler. /// /// # Arguments /// /// * `engine_name` - It takes the name of the engine to which the struct was associated to. /// /// # Returns /// /// It returns an option either containing the value or a none if the engine is unknown pub fn new(engine_name: &str) -> Option { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)), "searx" => ("searx", Box::new(super::searx::Searx)), _ => return None, }; Some(Self { engine: engine.1, name: engine.0, }) } /// This function converts the EngineHandler type into a tuple containing the engine name and /// the associated engine struct. pub fn into_name_engine(self) -> (&'static str, Box) { (self.name, self.engine) } }