Merge branch 'rolling' into instance

2024-11-21 21:48:21 -05:00 · 2024-01-31 09:50:16 +05:30 · 2024-01-31 09:50:16 +05:30 · 1c1f299980
commit 1c1f299980
parent 2299e6c28e 669e365913
4 changed files with 39 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -32,7 +32,7 @@
  <a href=""
    ><img
      alt="Maintenance"
-      src="https://img.shields.io/maintenance/yes/2023?style=flat-square"
+      src="https://img.shields.io/maintenance/yes/2024?style=flat-square"
    />
  </a>
  <a href="https://www.codefactor.io/repository/github/neon-mmd/websurfx">
--- a/src/cache/cacher.rs
+++ b/src/cache/cacher.rs
@ -4,6 +4,7 @@
 use error_stack::Report;
 #[cfg(feature = "memory-cache")]
 use mini_moka::sync::Cache as MokaCache;
+#[cfg(feature = "memory-cache")]
 use mini_moka::sync::ConcurrentCacheExt;

 #[cfg(feature = "memory-cache")]
--- a/src/config/parser.rs
+++ b/src/config/parser.rs
@ -98,6 +98,7 @@ impl Config {

        #[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
        let parsed_cet = globals.get::<_, u16>("cache_expiry_time")?;
+        #[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
        let cache_expiry_time = match parsed_cet {
            0..=59 => {
                log::error!(
--- a/src/models/engine_models.rs
+++ b/src/models/engine_models.rs
@ -86,6 +86,42 @@ pub trait SearchEngine: Sync + Send {
            .change_context(EngineError::RequestError)?)
    }

+    /// This helper function fetches/requests the json search results from the upstream search engine as a vector of bytes.
+    ///
+    /// # Arguments
+    ///
+    /// * `url` - It takes the url of the upstream search engine with the user requested search
+    /// query appended in the search parameters.
+    /// * `header_map` - It takes the http request headers to be sent to the upstream engine in
+    /// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
+    /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
+    /// the amount of time for each request to remain connected when until the results can be provided
+    /// by the upstream engine.
+    ///
+    /// # Error
+    ///
+    /// It returns the html data as a vector of bytes if the upstream engine provides the data as expected
+    /// otherwise it returns a custom `EngineError`.
+    async fn fetch_json_as_bytes_from_upstream(
+        &self,
+        url: &str,
+        header_map: reqwest::header::HeaderMap,
+        client: &Client,
+    ) -> Result<Vec<u8>, EngineError> {
+        // fetch the json response from upstream search engine
+
+        Ok(client
+            .get(url)
+            .headers(header_map) // add spoofed headers to emulate human behavior
+            .send()
+            .await
+            .change_context(EngineError::RequestError)?
+            .bytes()
+            .await
+            .change_context(EngineError::RequestError)?
+            .to_vec())
+    }
+
    /// This function scrapes results from the upstream engine and puts all the scraped results like
    /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
    /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult