mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 22:18:23 -05:00
77 lines
3.2 KiB
Rust
77 lines
3.2 KiB
Rust
//! This modules provides helper functionalities for parsing a html document into internal SearchResult.
|
|
use std::collections::HashMap;
|
|
|
|
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
|
|
use error_stack::{Report, Result};
|
|
use scraper::{html::Select, ElementRef, Html, Selector};
|
|
|
|
/// A html search result parser, based on a predefined CSS selectors.
|
|
pub struct SearchResultParser {
|
|
/// selector to locate the element which is displayed, if there were nothing found.
|
|
no_result: Selector,
|
|
/// selector to locate the element which contains one item from the search result.
|
|
results: Selector,
|
|
/// selector to locate the title relative to the search result item.
|
|
result_title: Selector,
|
|
/// selector to locate the url relative to the search result item.
|
|
result_url: Selector,
|
|
/// selector to locate the description relative to the search result item.
|
|
result_desc: Selector,
|
|
}
|
|
|
|
impl SearchResultParser {
|
|
/// Creates a new parser, if all the selectors are valid, otherwise it returns an EngineError
|
|
pub fn new(
|
|
no_result_selector: &str,
|
|
results_selector: &str,
|
|
result_title_selector: &str,
|
|
result_url_selector: &str,
|
|
result_desc_selector: &str,
|
|
) -> Result<SearchResultParser, EngineError> {
|
|
Ok(SearchResultParser {
|
|
no_result: new_selector(no_result_selector)?,
|
|
results: new_selector(results_selector)?,
|
|
result_title: new_selector(result_title_selector)?,
|
|
result_url: new_selector(result_url_selector)?,
|
|
result_desc: new_selector(result_desc_selector)?,
|
|
})
|
|
}
|
|
|
|
/// Parse the html and returns element representing the 'no result found' response.
|
|
pub fn parse_for_no_results<'a>(&'a self, document: &'a Html) -> Select<'a, 'a> {
|
|
document.select(&self.no_result)
|
|
}
|
|
|
|
/// Parse the html, and convert the results to SearchResult with the help of the builder function
|
|
pub fn parse_for_results(
|
|
&self,
|
|
document: &Html,
|
|
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
|
|
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
|
let res = document
|
|
.select(&self.results)
|
|
.filter_map(|result| {
|
|
let title = result.select(&self.result_title).next();
|
|
let url = result.select(&self.result_url).next();
|
|
let desc = result.select(&self.result_desc).next();
|
|
match (title, url, desc) {
|
|
(Some(ref t), Some(ref u), Some(ref d)) => builder(t, u, d),
|
|
_ => None,
|
|
}
|
|
})
|
|
.map(|search_result| (search_result.url.clone(), search_result))
|
|
.collect();
|
|
Ok(res)
|
|
}
|
|
}
|
|
|
|
/// Create a Selector struct, if the given parameter is a valid css expression, otherwise convert it into an EngineError.
|
|
fn new_selector(selector: &str) -> Result<Selector, EngineError> {
|
|
Selector::parse(selector).map_err(|err| {
|
|
Report::new(EngineError::UnexpectedError).attach_printable(format!(
|
|
"invalid CSS selector: {}, err: {:?}",
|
|
selector, err
|
|
))
|
|
})
|
|
}
|