0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-24 15:08:22 -05:00
This commit is contained in:
Nikodem Rabuliński 2024-10-02 08:25:17 +00:00 committed by GitHub
commit 52398820fc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 185 additions and 1 deletions

View File

@ -17,7 +17,8 @@ reqwest = { version = "0.12.5", default-features = false, features = [
"rustls-tls",
"brotli",
"gzip",
"http2"
"http2",
"json"
] }
tokio = { version = "1.32.0", features = [
"rt-multi-thread",

View File

@ -8,6 +8,7 @@ pub mod brave;
pub mod duckduckgo;
pub mod librex;
pub mod mojeek;
pub mod qwant;
pub mod search_result_parser;
pub mod searx;
pub mod startpage;

177
src/engines/qwant.rs Normal file
View File

@ -0,0 +1,177 @@
//! The `qwant` module handles the scraping of results from the qwant search engine
//! by querying the upstream qwant search engine with user provided query and with a page
//! number if provided.
use std::borrow::Cow;
use std::collections::HashMap;
use reqwest::header::HeaderMap;
use reqwest::{Client, Url};
use serde::Deserialize;
use crate::models::aggregation_models::SearchResult;
use crate::models::engine_models::{EngineError, SearchEngine};
use error_stack::{Report, Result, ResultExt};
/// A new Qwant engine type defined in-order to implement the `SearchEngine` trait which allows to
/// reduce code duplication as well as allows to create vector of different search engines easily.
pub struct Qwant;
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
/// Web page search result
struct QwantSearchResult {
// NOTE: This object also contains `favicon`, `url_ping_suffix`, `thumbnail_url`,
// `source`, and `is_family_friendly` attributes,
// which we currently don't care about.
/// Title of the result
title: String,
/// Url of the result
url: String,
/// Description of the result
desc: String,
}
impl From<&QwantSearchResult> for SearchResult {
fn from(value: &QwantSearchResult) -> Self {
SearchResult::new(&value.title, &value.url, &value.desc, &["qwant"])
}
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "type")]
/// A result which should be shown to the user
enum QwantItem {
/// Results containing web pages relevant to the query
Web {
// NOTE: This object also contains `count` and `serpContextId` attributes,
// which we currently don't care about.
/// List of web page search results
items: Vec<QwantSearchResult>,
},
#[serde(other)]
/// Other item type like "related_searches", which aren't relevant.
Other,
}
#[derive(Deserialize, Debug)]
struct QwantItems {
// NOTE: This object also contains `headline`, `sidebar`, and `bottomline` attributes,
// which we currently don't care about.
/// Results which should be shown in the main section of the page
mainline: Vec<QwantItem>,
}
#[derive(Deserialize, Debug)]
struct QwantResult {
// NOTE: This object also contains `denied`, `total`, `items`, `filters`, `lastPage`,
// `instrumentation`, `onlyProductAds`, and `topClassification` attributes,
// which we currently don't care about.
/// Entries that should be shown to the user
items: QwantItems,
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "status", content = "data")]
enum QwantApiResponse {
/// Success response
Success {
// NOTE: This object also contains `query` and `cache` attributes,
// which we currently don't care about.
/// Actual results the search produced
result: QwantResult,
},
// TODO: Use the reported error messages
#[allow(unused)]
/// Error response
Error {
/// Machine-readable error code
error_code: i32,
#[serde(default)]
/// List of human-readable error messages
message: Vec<String>,
},
}
impl From<QwantApiResponse> for Result<QwantResult, EngineError> {
fn from(value: QwantApiResponse) -> Self {
match value {
QwantApiResponse::Success { result } => Ok(result),
QwantApiResponse::Error { .. } => Err(Report::new(EngineError::RequestError)),
}
}
}
#[async_trait::async_trait]
impl SearchEngine for Qwant {
async fn results(
&self,
query: &str,
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
let results_per_page = 10;
let start_result = results_per_page * page;
let url = Url::parse_with_params(
"https://api.qwant.com/v3/search/web",
&[
("q", Cow::from(query)),
("count", results_per_page.to_string().into()),
("locale", "en_US".into()),
("offset", start_result.to_string().into()),
("safesearch", safe_search.to_string().into()),
("device", "desktop".into()),
("tgb", "2".into()),
("displayed", "true".into()),
],
)
.change_context(EngineError::UnexpectedError)?;
let header_map = HeaderMap::try_from(&HashMap::from([
("User-Agent".to_string(), user_agent.to_string()),
("Referer".to_string(), "https://qwant.com/".to_string()),
("Origin".to_string(), "https://qwant.com".to_string()),
]))
.change_context(EngineError::UnexpectedError)?;
let result: QwantApiResponse = client
.get(url)
.headers(header_map)
.send()
.await
.change_context(EngineError::RequestError)?
.json()
.await
.change_context(EngineError::RequestError)?;
let result = Result::from(result)?;
let results: Vec<_> = result
.items
.mainline
.into_iter()
.filter_map(|item| match item {
QwantItem::Web { items } => Some(items),
_ => None,
})
.flatten()
.map(|result| {
let search_result = SearchResult::from(&result);
(result.url, search_result)
})
.collect();
if results.is_empty() {
Err(Report::new(EngineError::EmptyResultSet))
} else {
Ok(results)
}
}
}

View File

@ -206,6 +206,10 @@ impl EngineHandler {
let engine = crate::engines::bing::Bing::new()?;
("bing", Box::new(engine))
}
"qwant" => {
let engine = crate::engines::qwant::Qwant;
("qwant", Box::new(engine))
}
_ => {
return Err(Report::from(EngineError::NoSuchEngineFound(
engine_name.to_string(),

View File

@ -74,4 +74,5 @@ upstream_search_engines = {
LibreX = false,
Mojeek = false,
Bing = false,
Qwant = false,
} -- select the upstream search engines from which the results should be fetched.