diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 16586c0..8e92b10 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -1,18 +1,22 @@ //! This module provides the functionality to scrape and gathers all the results from the upstream //! search engines and then removes duplicate results. -use std::{collections::HashMap, time::Duration}; - -use error_stack::Report; -use rand::Rng; -use tokio::task::JoinHandle; +use std::{collections::HashMap, io::BufReader, time::Duration}; use super::{ aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, user_agent::random_user_agent, }; +use error_stack::Report; +use rand::Rng; +use regex::Regex; +use std::{fs::File, io::BufRead}; +use tokio::task::JoinHandle; -use crate::engines::engine_models::{EngineError, EngineHandler}; +use crate::{ + engines::engine_models::{EngineError, EngineHandler}, + handler::paths::{file_path, FileType}, +}; /// Aliases for long type annotations type FutureVec = Vec, Report>>>; @@ -106,7 +110,7 @@ pub async fn aggregate( log::error!("Engine Error: {:?}", error); engine_errors_info.push(EngineErrorInfo::new( error.downcast_ref::().unwrap(), - engine_name.to_string(), + engine_name, )); }; @@ -143,7 +147,22 @@ pub async fn aggregate( } } - let results = result_map.into_values().collect(); + let mut blacklist_map: HashMap = HashMap::new(); + filter_with_lists( + &mut result_map, + &mut blacklist_map, + &file_path(FileType::BlockList)?, + )?; + + filter_with_lists( + &mut blacklist_map, + &mut result_map, + &file_path(FileType::AllowList)?, + )?; + + drop(blacklist_map); + + let results: Vec = result_map.into_values().collect(); Ok(SearchResults::new( results, @@ -151,3 +170,23 @@ pub async fn aggregate( engine_errors_info, )) } + +fn filter_with_lists( + map_to_be_filtered: &mut HashMap, + resultant_map: &mut HashMap, + file_path: &str, +) -> Result<(), Box> { + for (url, search_result) in map_to_be_filtered.clone().into_iter() { + let reader = BufReader::new(File::open(file_path)?); + for line in reader.lines() { + let re = Regex::new(&line?)?; + if re.is_match(&url.to_lowercase()) + || re.is_match(&search_result.title.to_lowercase()) + || re.is_match(&search_result.description.to_lowercase()) + { + resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap()); + } + } + } + Ok(()) +}