mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-24 23:18:22 -05:00
Compare commits
5 Commits
52398820fc
...
746ec10ed0
Author | SHA1 | Date | |
---|---|---|---|
|
746ec10ed0 | ||
|
8323f49133 | ||
|
afefd023e9 | ||
|
709425f60d | ||
|
fb0c2db08e |
@ -17,7 +17,8 @@ reqwest = { version = "0.12.5", default-features = false, features = [
|
||||
"rustls-tls",
|
||||
"brotli",
|
||||
"gzip",
|
||||
"http2"
|
||||
"http2",
|
||||
"json"
|
||||
] }
|
||||
tokio = { version = "1.32.0", features = [
|
||||
"rt-multi-thread",
|
||||
|
@ -8,6 +8,7 @@ pub mod brave;
|
||||
pub mod duckduckgo;
|
||||
pub mod librex;
|
||||
pub mod mojeek;
|
||||
pub mod qwant;
|
||||
pub mod search_result_parser;
|
||||
pub mod searx;
|
||||
pub mod startpage;
|
||||
|
177
src/engines/qwant.rs
Normal file
177
src/engines/qwant.rs
Normal file
@ -0,0 +1,177 @@
|
||||
//! The `qwant` module handles the scraping of results from the qwant search engine
|
||||
//! by querying the upstream qwant search engine with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::header::HeaderMap;
|
||||
use reqwest::{Client, Url};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::models::aggregation_models::SearchResult;
|
||||
|
||||
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||
|
||||
use error_stack::{Report, Result, ResultExt};
|
||||
|
||||
/// A new Qwant engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||
pub struct Qwant;
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
/// Web page search result
|
||||
struct QwantSearchResult {
|
||||
// NOTE: This object also contains `favicon`, `url_ping_suffix`, `thumbnail_url`,
|
||||
// `source`, and `is_family_friendly` attributes,
|
||||
// which we currently don't care about.
|
||||
/// Title of the result
|
||||
title: String,
|
||||
/// Url of the result
|
||||
url: String,
|
||||
/// Description of the result
|
||||
desc: String,
|
||||
}
|
||||
|
||||
impl From<&QwantSearchResult> for SearchResult {
|
||||
fn from(value: &QwantSearchResult) -> Self {
|
||||
SearchResult::new(&value.title, &value.url, &value.desc, &["qwant"])
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[serde(tag = "type")]
|
||||
/// A result which should be shown to the user
|
||||
enum QwantItem {
|
||||
/// Results containing web pages relevant to the query
|
||||
Web {
|
||||
// NOTE: This object also contains `count` and `serpContextId` attributes,
|
||||
// which we currently don't care about.
|
||||
/// List of web page search results
|
||||
items: Vec<QwantSearchResult>,
|
||||
},
|
||||
#[serde(other)]
|
||||
/// Other item type like "related_searches", which aren't relevant.
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct QwantItems {
|
||||
// NOTE: This object also contains `headline`, `sidebar`, and `bottomline` attributes,
|
||||
// which we currently don't care about.
|
||||
/// Results which should be shown in the main section of the page
|
||||
mainline: Vec<QwantItem>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct QwantResult {
|
||||
// NOTE: This object also contains `denied`, `total`, `items`, `filters`, `lastPage`,
|
||||
// `instrumentation`, `onlyProductAds`, and `topClassification` attributes,
|
||||
// which we currently don't care about.
|
||||
/// Entries that should be shown to the user
|
||||
items: QwantItems,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[serde(tag = "status", content = "data")]
|
||||
enum QwantApiResponse {
|
||||
/// Success response
|
||||
Success {
|
||||
// NOTE: This object also contains `query` and `cache` attributes,
|
||||
// which we currently don't care about.
|
||||
/// Actual results the search produced
|
||||
result: QwantResult,
|
||||
},
|
||||
// TODO: Use the reported error messages
|
||||
#[allow(unused)]
|
||||
/// Error response
|
||||
Error {
|
||||
/// Machine-readable error code
|
||||
error_code: i32,
|
||||
#[serde(default)]
|
||||
/// List of human-readable error messages
|
||||
message: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<QwantApiResponse> for Result<QwantResult, EngineError> {
|
||||
fn from(value: QwantApiResponse) -> Self {
|
||||
match value {
|
||||
QwantApiResponse::Success { result } => Ok(result),
|
||||
QwantApiResponse::Error { .. } => Err(Report::new(EngineError::RequestError)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl SearchEngine for Qwant {
|
||||
async fn results(
|
||||
&self,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: &str,
|
||||
client: &Client,
|
||||
safe_search: u8,
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
let results_per_page = 10;
|
||||
let start_result = results_per_page * page;
|
||||
|
||||
let url = Url::parse_with_params(
|
||||
"https://api.qwant.com/v3/search/web",
|
||||
&[
|
||||
("q", Cow::from(query)),
|
||||
("count", results_per_page.to_string().into()),
|
||||
("locale", "en_US".into()),
|
||||
("offset", start_result.to_string().into()),
|
||||
("safesearch", safe_search.to_string().into()),
|
||||
("device", "desktop".into()),
|
||||
("tgb", "2".into()),
|
||||
("displayed", "true".into()),
|
||||
],
|
||||
)
|
||||
.change_context(EngineError::UnexpectedError)?;
|
||||
|
||||
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||
("User-Agent".to_string(), user_agent.to_string()),
|
||||
("Referer".to_string(), "https://qwant.com/".to_string()),
|
||||
("Origin".to_string(), "https://qwant.com".to_string()),
|
||||
]))
|
||||
.change_context(EngineError::UnexpectedError)?;
|
||||
|
||||
let result: QwantApiResponse = client
|
||||
.get(url)
|
||||
.headers(header_map)
|
||||
.send()
|
||||
.await
|
||||
.change_context(EngineError::RequestError)?
|
||||
.json()
|
||||
.await
|
||||
.change_context(EngineError::RequestError)?;
|
||||
|
||||
let result = Result::from(result)?;
|
||||
|
||||
let results: Vec<_> = result
|
||||
.items
|
||||
.mainline
|
||||
.into_iter()
|
||||
.filter_map(|item| match item {
|
||||
QwantItem::Web { items } => Some(items),
|
||||
_ => None,
|
||||
})
|
||||
.flatten()
|
||||
.map(|result| {
|
||||
let search_result = SearchResult::from(&result);
|
||||
(result.url, search_result)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if results.is_empty() {
|
||||
Err(Report::new(EngineError::EmptyResultSet))
|
||||
} else {
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
}
|
@ -206,6 +206,10 @@ impl EngineHandler {
|
||||
let engine = crate::engines::bing::Bing::new()?;
|
||||
("bing", Box::new(engine))
|
||||
}
|
||||
"qwant" => {
|
||||
let engine = crate::engines::qwant::Qwant;
|
||||
("qwant", Box::new(engine))
|
||||
}
|
||||
_ => {
|
||||
return Err(Report::from(EngineError::NoSuchEngineFound(
|
||||
engine_name.to_string(),
|
||||
|
@ -74,4 +74,5 @@ upstream_search_engines = {
|
||||
LibreX = false,
|
||||
Mojeek = false,
|
||||
Bing = false,
|
||||
Qwant = false,
|
||||
} -- select the upstream search engines from which the results should be fetched.
|
||||
|
Loading…
Reference in New Issue
Block a user