mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-24 06:58:22 -05:00
Compare commits
9 Commits
af2bd26e23
...
746ec10ed0
Author | SHA1 | Date | |
---|---|---|---|
|
746ec10ed0 | ||
|
5d59a2c7be | ||
|
ecc6875a21 | ||
|
d75693ce4e | ||
|
e7efca4a4e | ||
|
8323f49133 | ||
|
afefd023e9 | ||
|
709425f60d | ||
|
fb0c2db08e |
44
Cargo.lock
generated
44
Cargo.lock
generated
@ -59,9 +59,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "actix-governor"
|
name = "actix-governor"
|
||||||
version = "0.5.0"
|
version = "0.6.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a2e7b88f3804e01bd4191fdb08650430bbfcb43d3d9b2890064df3551ec7d25b"
|
checksum = "0954b0f27aabd8f56bb03f2a77b412ddf3f8c034a3c27b2086c1fc75415760df"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-http",
|
"actix-http",
|
||||||
"actix-web",
|
"actix-web",
|
||||||
@ -1180,9 +1180,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "error-stack"
|
name = "error-stack"
|
||||||
version = "0.4.1"
|
version = "0.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "27a72baa257b5e0e2de241967bc5ee8f855d6072351042688621081d66b2a76b"
|
checksum = "fe413319145d1063f080f27556fd30b1d70b01e2ba10c2a6e40d4be982ffc5d1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"rustc_version 0.4.1",
|
"rustc_version 0.4.1",
|
||||||
@ -2411,12 +2411,31 @@ version = "0.3.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
|
checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-bigint"
|
||||||
|
version = "0.4.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
|
||||||
|
dependencies = [
|
||||||
|
"num-integer",
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-conv"
|
name = "num-conv"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-integer"
|
||||||
|
version = "0.1.46"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-traits"
|
name = "num-traits"
|
||||||
version = "0.2.19"
|
version = "0.2.19"
|
||||||
@ -3127,9 +3146,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redis"
|
name = "redis"
|
||||||
version = "0.25.4"
|
version = "0.27.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e0d7a6955c7511f60f3ba9e86c6d02b3c3f144f8c24b288d1f4e18074ab8bbec"
|
checksum = "a7e86f5670bd8b028edfb240f0616cad620705b31ec389d55e4f3da2c38dcd48"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@ -3138,6 +3157,7 @@ dependencies = [
|
|||||||
"futures 0.3.30",
|
"futures 0.3.30",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"itoa 1.0.11",
|
"itoa 1.0.11",
|
||||||
|
"num-bigint",
|
||||||
"percent-encoding 2.3.1",
|
"percent-encoding 2.3.1",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"ryu",
|
"ryu",
|
||||||
@ -3164,9 +3184,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.10.6"
|
version = "1.11.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
|
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick 1.1.3",
|
"aho-corasick 1.1.3",
|
||||||
"memchr",
|
"memchr",
|
||||||
@ -3176,9 +3196,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-automata"
|
name = "regex-automata"
|
||||||
version = "0.4.7"
|
version = "0.4.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
|
checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick 1.1.3",
|
"aho-corasick 1.1.3",
|
||||||
"memchr",
|
"memchr",
|
||||||
@ -3193,9 +3213,9 @@ checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-syntax"
|
name = "regex-syntax"
|
||||||
version = "0.8.4"
|
version = "0.8.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
|
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest"
|
name = "reqwest"
|
||||||
|
11
Cargo.toml
11
Cargo.toml
@ -17,7 +17,8 @@ reqwest = { version = "0.12.5", default-features = false, features = [
|
|||||||
"rustls-tls",
|
"rustls-tls",
|
||||||
"brotli",
|
"brotli",
|
||||||
"gzip",
|
"gzip",
|
||||||
"http2"
|
"http2",
|
||||||
|
"json"
|
||||||
] }
|
] }
|
||||||
tokio = { version = "1.32.0", features = [
|
tokio = { version = "1.32.0", features = [
|
||||||
"rt-multi-thread",
|
"rt-multi-thread",
|
||||||
@ -46,22 +47,22 @@ mlua = { version = "0.9.9", features = [
|
|||||||
"luajit",
|
"luajit",
|
||||||
"vendored",
|
"vendored",
|
||||||
], default-features = false }
|
], default-features = false }
|
||||||
redis = { version = "0.25.4", features = [
|
redis = { version = "0.27.2", features = [
|
||||||
"tokio-comp",
|
"tokio-comp",
|
||||||
"connection-manager",
|
"connection-manager",
|
||||||
"tcp_nodelay"
|
"tcp_nodelay"
|
||||||
], default-features = false, optional = true }
|
], default-features = false, optional = true }
|
||||||
blake3 = { version = "1.5.4", default-features = false }
|
blake3 = { version = "1.5.4", default-features = false }
|
||||||
error-stack = { version = "0.4.0", default-features = false, features = [
|
error-stack = { version = "0.5.0", default-features = false, features = [
|
||||||
"std",
|
"std",
|
||||||
] }
|
] }
|
||||||
async-trait = { version = "0.1.80", default-features = false }
|
async-trait = { version = "0.1.80", default-features = false }
|
||||||
regex = { version = "1.9.4", features = ["perf"], default-features = false }
|
regex = { version = "1.11.0", features = ["perf"], default-features = false }
|
||||||
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
|
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
|
||||||
dhat = { version = "0.3.2", optional = true, default-features = false }
|
dhat = { version = "0.3.2", optional = true, default-features = false }
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
async-once-cell = { version = "0.5.3", default-features = false }
|
async-once-cell = { version = "0.5.3", default-features = false }
|
||||||
actix-governor = { version = "0.5.0", default-features = false }
|
actix-governor = { version = "0.6.0", default-features = false }
|
||||||
moka = { version = "0.12.8", optional = true, default-features = false, features = [
|
moka = { version = "0.12.8", optional = true, default-features = false, features = [
|
||||||
"future",
|
"future",
|
||||||
] }
|
] }
|
||||||
|
@ -8,6 +8,7 @@ pub mod brave;
|
|||||||
pub mod duckduckgo;
|
pub mod duckduckgo;
|
||||||
pub mod librex;
|
pub mod librex;
|
||||||
pub mod mojeek;
|
pub mod mojeek;
|
||||||
|
pub mod qwant;
|
||||||
pub mod search_result_parser;
|
pub mod search_result_parser;
|
||||||
pub mod searx;
|
pub mod searx;
|
||||||
pub mod startpage;
|
pub mod startpage;
|
||||||
|
177
src/engines/qwant.rs
Normal file
177
src/engines/qwant.rs
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
//! The `qwant` module handles the scraping of results from the qwant search engine
|
||||||
|
//! by querying the upstream qwant search engine with user provided query and with a page
|
||||||
|
//! number if provided.
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use reqwest::header::HeaderMap;
|
||||||
|
use reqwest::{Client, Url};
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
use crate::models::aggregation_models::SearchResult;
|
||||||
|
|
||||||
|
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||||
|
|
||||||
|
use error_stack::{Report, Result, ResultExt};
|
||||||
|
|
||||||
|
/// A new Qwant engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||||
|
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||||
|
pub struct Qwant;
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
/// Web page search result
|
||||||
|
struct QwantSearchResult {
|
||||||
|
// NOTE: This object also contains `favicon`, `url_ping_suffix`, `thumbnail_url`,
|
||||||
|
// `source`, and `is_family_friendly` attributes,
|
||||||
|
// which we currently don't care about.
|
||||||
|
/// Title of the result
|
||||||
|
title: String,
|
||||||
|
/// Url of the result
|
||||||
|
url: String,
|
||||||
|
/// Description of the result
|
||||||
|
desc: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&QwantSearchResult> for SearchResult {
|
||||||
|
fn from(value: &QwantSearchResult) -> Self {
|
||||||
|
SearchResult::new(&value.title, &value.url, &value.desc, &["qwant"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
#[serde(tag = "type")]
|
||||||
|
/// A result which should be shown to the user
|
||||||
|
enum QwantItem {
|
||||||
|
/// Results containing web pages relevant to the query
|
||||||
|
Web {
|
||||||
|
// NOTE: This object also contains `count` and `serpContextId` attributes,
|
||||||
|
// which we currently don't care about.
|
||||||
|
/// List of web page search results
|
||||||
|
items: Vec<QwantSearchResult>,
|
||||||
|
},
|
||||||
|
#[serde(other)]
|
||||||
|
/// Other item type like "related_searches", which aren't relevant.
|
||||||
|
Other,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
struct QwantItems {
|
||||||
|
// NOTE: This object also contains `headline`, `sidebar`, and `bottomline` attributes,
|
||||||
|
// which we currently don't care about.
|
||||||
|
/// Results which should be shown in the main section of the page
|
||||||
|
mainline: Vec<QwantItem>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
struct QwantResult {
|
||||||
|
// NOTE: This object also contains `denied`, `total`, `items`, `filters`, `lastPage`,
|
||||||
|
// `instrumentation`, `onlyProductAds`, and `topClassification` attributes,
|
||||||
|
// which we currently don't care about.
|
||||||
|
/// Entries that should be shown to the user
|
||||||
|
items: QwantItems,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
#[serde(tag = "status", content = "data")]
|
||||||
|
enum QwantApiResponse {
|
||||||
|
/// Success response
|
||||||
|
Success {
|
||||||
|
// NOTE: This object also contains `query` and `cache` attributes,
|
||||||
|
// which we currently don't care about.
|
||||||
|
/// Actual results the search produced
|
||||||
|
result: QwantResult,
|
||||||
|
},
|
||||||
|
// TODO: Use the reported error messages
|
||||||
|
#[allow(unused)]
|
||||||
|
/// Error response
|
||||||
|
Error {
|
||||||
|
/// Machine-readable error code
|
||||||
|
error_code: i32,
|
||||||
|
#[serde(default)]
|
||||||
|
/// List of human-readable error messages
|
||||||
|
message: Vec<String>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<QwantApiResponse> for Result<QwantResult, EngineError> {
|
||||||
|
fn from(value: QwantApiResponse) -> Self {
|
||||||
|
match value {
|
||||||
|
QwantApiResponse::Success { result } => Ok(result),
|
||||||
|
QwantApiResponse::Error { .. } => Err(Report::new(EngineError::RequestError)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl SearchEngine for Qwant {
|
||||||
|
async fn results(
|
||||||
|
&self,
|
||||||
|
query: &str,
|
||||||
|
page: u32,
|
||||||
|
user_agent: &str,
|
||||||
|
client: &Client,
|
||||||
|
safe_search: u8,
|
||||||
|
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||||
|
let results_per_page = 10;
|
||||||
|
let start_result = results_per_page * page;
|
||||||
|
|
||||||
|
let url = Url::parse_with_params(
|
||||||
|
"https://api.qwant.com/v3/search/web",
|
||||||
|
&[
|
||||||
|
("q", Cow::from(query)),
|
||||||
|
("count", results_per_page.to_string().into()),
|
||||||
|
("locale", "en_US".into()),
|
||||||
|
("offset", start_result.to_string().into()),
|
||||||
|
("safesearch", safe_search.to_string().into()),
|
||||||
|
("device", "desktop".into()),
|
||||||
|
("tgb", "2".into()),
|
||||||
|
("displayed", "true".into()),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.change_context(EngineError::UnexpectedError)?;
|
||||||
|
|
||||||
|
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||||
|
("User-Agent".to_string(), user_agent.to_string()),
|
||||||
|
("Referer".to_string(), "https://qwant.com/".to_string()),
|
||||||
|
("Origin".to_string(), "https://qwant.com".to_string()),
|
||||||
|
]))
|
||||||
|
.change_context(EngineError::UnexpectedError)?;
|
||||||
|
|
||||||
|
let result: QwantApiResponse = client
|
||||||
|
.get(url)
|
||||||
|
.headers(header_map)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.change_context(EngineError::RequestError)?
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.change_context(EngineError::RequestError)?;
|
||||||
|
|
||||||
|
let result = Result::from(result)?;
|
||||||
|
|
||||||
|
let results: Vec<_> = result
|
||||||
|
.items
|
||||||
|
.mainline
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|item| match item {
|
||||||
|
QwantItem::Web { items } => Some(items),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.flatten()
|
||||||
|
.map(|result| {
|
||||||
|
let search_result = SearchResult::from(&result);
|
||||||
|
(result.url, search_result)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if results.is_empty() {
|
||||||
|
Err(Report::new(EngineError::EmptyResultSet))
|
||||||
|
} else {
|
||||||
|
Ok(results)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -206,6 +206,10 @@ impl EngineHandler {
|
|||||||
let engine = crate::engines::bing::Bing::new()?;
|
let engine = crate::engines::bing::Bing::new()?;
|
||||||
("bing", Box::new(engine))
|
("bing", Box::new(engine))
|
||||||
}
|
}
|
||||||
|
"qwant" => {
|
||||||
|
let engine = crate::engines::qwant::Qwant;
|
||||||
|
("qwant", Box::new(engine))
|
||||||
|
}
|
||||||
_ => {
|
_ => {
|
||||||
return Err(Report::from(EngineError::NoSuchEngineFound(
|
return Err(Report::from(EngineError::NoSuchEngineFound(
|
||||||
engine_name.to_string(),
|
engine_name.to_string(),
|
||||||
|
@ -74,4 +74,5 @@ upstream_search_engines = {
|
|||||||
LibreX = false,
|
LibreX = false,
|
||||||
Mojeek = false,
|
Mojeek = false,
|
||||||
Bing = false,
|
Bing = false,
|
||||||
|
Qwant = false,
|
||||||
} -- select the upstream search engines from which the results should be fetched.
|
} -- select the upstream search engines from which the results should be fetched.
|
||||||
|
Loading…
Reference in New Issue
Block a user