0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-21 21:48:21 -05:00

Merge pull request #16 from neon-mmd/rolling

Release 0.6.0
This commit is contained in:
neon_arch 2023-05-09 16:26:51 +00:00 committed by GitHub
commit 16d76bf70d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 143 additions and 91 deletions

9
.dockerignore Normal file
View File

@ -0,0 +1,9 @@
target/
.git/
.github/
.dockerignore
Dockerfile
docker-compose.yml
images
*.md
LICENSE

130
Cargo.lock generated
View File

@ -87,7 +87,7 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6"
dependencies = [
"quote 1.0.26",
"quote 1.0.27",
"syn 1.0.109",
]
@ -190,7 +190,7 @@ dependencies = [
"serde_urlencoded 0.7.1",
"smallvec 1.10.0",
"socket2",
"time 0.3.20",
"time 0.3.21",
"url 2.3.1",
]
@ -202,7 +202,7 @@ checksum = "2262160a7ae29e3415554a3f1fc04c764b1540c116aa524683208078b7a75bc9"
dependencies = [
"actix-router",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 1.0.109",
]
@ -480,7 +480,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb"
dependencies = [
"percent-encoding 2.2.0",
"time 0.3.20",
"time 0.3.21",
"version_check",
]
@ -606,7 +606,7 @@ dependencies = [
"matches",
"phf 0.10.1",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"smallvec 1.10.0",
"syn 1.0.109",
]
@ -617,7 +617,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
dependencies = [
"quote 1.0.26",
"quote 1.0.27",
"syn 1.0.109",
]
@ -629,7 +629,7 @@ checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"convert_case",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"rustc_version 0.4.0",
"syn 1.0.109",
]
@ -731,7 +731,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 1.0.109",
"synstructure",
]
@ -1036,7 +1036,7 @@ dependencies = [
"mac",
"markup5ever 0.11.0",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 1.0.109",
]
@ -1301,9 +1301,9 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.61"
version = "0.3.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730"
checksum = "68c16e1bfd491478ab155fd8b4896b86f9ede344949b641e61501e07c2b8b4d5"
dependencies = [
"wasm-bindgen",
]
@ -1332,15 +1332,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.142"
version = "0.2.144"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
[[package]]
name = "linux-raw-sys"
version = "0.3.6"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b64f40e5e03e0d54f03845c8197d0291253cdbedfb1cb46b13c2c117554a9f4c"
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
[[package]]
name = "local-channel"
@ -1632,7 +1632,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 2.0.15",
]
@ -1750,7 +1750,7 @@ dependencies = [
"pest",
"pest_meta",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 2.0.15",
]
@ -1864,7 +1864,7 @@ dependencies = [
"phf_shared 0.10.0",
"proc-macro-hack",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 1.0.109",
]
@ -1909,9 +1909,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.26"
version = "0.3.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]]
name = "ppv-lite86"
@ -1970,9 +1970,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.26"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
dependencies = [
"proc-macro2 1.0.56",
]
@ -2314,9 +2314,9 @@ dependencies = [
[[package]]
name = "rlua-lua54-sys"
version = "0.1.3"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23ae48797c3e76fb2c205fda8f30e28416a15b9fc1d649cc7cea9ff1fb9cf028"
checksum = "93f42202b5aeb0bcc5df28436f8d963f8cbcbb898033a9e28c7ba4f299707934"
dependencies = [
"cc",
"libc",
@ -2349,9 +2349,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.37.17"
version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc809f704c03a812ac71f22456c857be34185cac691a4316f27ab0f633bb9009"
checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
dependencies = [
"bitflags",
"errno",
@ -2482,21 +2482,21 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.160"
version = "1.0.162"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c"
checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.160"
version = "1.0.162"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df"
checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 2.0.15",
]
@ -2681,7 +2681,7 @@ dependencies = [
"phf_generator 0.7.24",
"phf_shared 0.7.24",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"string_cache_shared",
]
@ -2694,7 +2694,7 @@ dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
]
[[package]]
@ -2721,7 +2721,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"unicode-ident",
]
@ -2732,7 +2732,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"unicode-ident",
]
@ -2743,7 +2743,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 1.0.109",
"unicode-xid 0.2.4",
]
@ -2797,7 +2797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 2.0.15",
]
@ -2814,9 +2814,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.20"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890"
checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc"
dependencies = [
"itoa 1.0.6",
"serde",
@ -2826,15 +2826,15 @@ dependencies = [
[[package]]
name = "time-core"
version = "0.1.0"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd"
checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
[[package]]
name = "time-macros"
version = "0.2.8"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36"
checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b"
dependencies = [
"time-core",
]
@ -2941,7 +2941,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"quote 1.0.27",
"syn 2.0.15",
]
@ -3243,9 +3243,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.84"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b"
checksum = "5b6cb788c4e39112fbe1822277ef6fb3c55cd86b95cb3d3c4c1c9597e4ac74b4"
dependencies = [
"cfg-if 1.0.0",
"wasm-bindgen-macro",
@ -3253,24 +3253,24 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.84"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9"
checksum = "35e522ed4105a9d626d885b35d62501b30d9666283a5c8be12c14a8bdafe7822"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2 1.0.56",
"quote 1.0.26",
"syn 1.0.109",
"quote 1.0.27",
"syn 2.0.15",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.34"
version = "0.4.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454"
checksum = "083abe15c5d88556b77bdf7aef403625be9e327ad37c62c4e4129af740168163"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
@ -3280,38 +3280,38 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.84"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5"
checksum = "358a79a0cb89d21db8120cbfb91392335913e4890665b1a7981d9e956903b434"
dependencies = [
"quote 1.0.26",
"quote 1.0.27",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.84"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6"
checksum = "4783ce29f09b9d93134d41297aded3a712b7b979e9c6f28c32cb88c973a94869"
dependencies = [
"proc-macro2 1.0.56",
"quote 1.0.26",
"syn 1.0.109",
"quote 1.0.27",
"syn 2.0.15",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.84"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d"
checksum = "a901d592cafaa4d711bc324edfaff879ac700b19c3dfd60058d2b445be2691eb"
[[package]]
name = "web-sys"
version = "0.3.61"
version = "0.3.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97"
checksum = "16b5f940c7edfdc6d12126d98c9ef4d1b3d470011c47c76a6581df47ad9ba721"
dependencies = [
"js-sys",
"wasm-bindgen",
@ -3319,7 +3319,7 @@ dependencies = [
[[package]]
name = "websurfx"
version = "0.1.0"
version = "0.6.0"
dependencies = [
"actix-files",
"actix-web",
@ -3328,7 +3328,7 @@ dependencies = [
"handlebars",
"log",
"md5",
"rand 0.6.5",
"rand 0.8.5",
"redis",
"reqwest 0.11.17",
"rlua",

View File

@ -1,6 +1,6 @@
[package]
name = "websurfx"
version = "0.1.0"
version = "0.6.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

26
Dockerfile Normal file
View File

@ -0,0 +1,26 @@
FROM rust:latest AS chef
# We only pay the installation cost once,
# it will be cached from the second build onwards
RUN cargo install cargo-chef
WORKDIR app
FROM chef AS planner
COPY . .
RUN cargo chef prepare --recipe-path recipe.json
FROM chef AS builder
COPY --from=planner /app/recipe.json recipe.json
# Build dependencies - this is the caching Docker layer!
RUN cargo chef cook --release --recipe-path recipe.json
# Build application
COPY . .
RUN cargo install --path .
# We do not need the Rust toolchain to run the binary!
FROM gcr.io/distroless/cc-debian11
COPY --from=builder ./public/ ./public/
COPY --from=builder ./websurfx/ ./websurfx/
COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
CMD ["websurfx"]

15
docker-compose.yml Normal file
View File

@ -0,0 +1,15 @@
version: "3.9"
services:
app:
image: websurfx:latest
build: .
ports:
- 8080:8080
depends_on:
- redis
links:
- redis
redis:
image: redis:latest
ports:
- 6379:6379

View File

@ -2,10 +2,9 @@
//! by querying the upstream duckduckgo search engine with user provided query and with a page
//! number if provided.
use std::{collections::HashMap, time::Duration};
use std::collections::HashMap;
use rand::Rng;
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use scraper::{Html, Selector};
use crate::search_results_handler::aggregation_models::RawSearchResult;
@ -47,16 +46,12 @@ pub async fn results(
}
};
// Add a random delay before making the request.
let mut rng = rand::thread_rng();
let delay_secs = rng.gen_range(1, 10);
std::thread::sleep(Duration::from_secs(delay_secs));
// initializing HeaderMap and adding appropriate headers.
let mut header_map = HeaderMap::new();
header_map.insert(USER_AGENT, user_agent.parse()?);
header_map.insert(REFERER, "https://google.com/".parse()?);
header_map.insert(CONTENT_TYPE, "text/html; charset=UTF-8".parse()?);
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
header_map.insert(COOKIE, "kl=wt-wt".parse()?);
// fetch the html from upstream duckduckgo engine
// TODO: Write better error handling code to handle no results case.

View File

@ -2,10 +2,9 @@
//! by querying the upstream searx search engine instance with user provided query and with a page
//! number if provided.
use rand::Rng;
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use scraper::{Html, Selector};
use std::{collections::HashMap, time::Duration};
use std::collections::HashMap;
use crate::search_results_handler::aggregation_models::RawSearchResult;
@ -34,16 +33,12 @@ pub async fn results(
// so that upstream server recieves valid page number.
let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
// Add random delay before making the request.
let mut rng = rand::thread_rng();
let delay_secs = rng.gen_range(1, 10);
std::thread::sleep(Duration::from_secs(delay_secs));
// initializing headers and adding appropriate headers.
let mut header_map = HeaderMap::new();
header_map.insert(USER_AGENT, user_agent.parse()?);
header_map.insert(REFERER, "https://google.com/".parse()?);
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
// fetch the html from upstream searx instance engine
// TODO: Write better error handling code to handle no results case.

View File

@ -1,7 +1,10 @@
//! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results.
use std::collections::HashMap;
use std::{collections::HashMap, time::Duration};
use rand::Rng;
use tokio::join;
use super::{
aggregation_models::{RawSearchResult, SearchResult, SearchResults},
@ -39,10 +42,19 @@ pub async fn aggregate(
let user_agent: String = random_user_agent();
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
let ddg_map_results: HashMap<String, RawSearchResult> =
duckduckgo::results(query, page, &user_agent).await?;
let searx_map_results: HashMap<String, RawSearchResult> =
searx::results(query, page, &user_agent).await?;
// Add a random delay before making the request.
let mut rng = rand::thread_rng();
let delay_secs = rng.gen_range(1..10);
std::thread::sleep(Duration::from_secs(delay_secs));
// fetch results from upstream search engines simultaneously/concurrently.
let (ddg_map_results, searx_map_results) = join!(
duckduckgo::results(query, page, &user_agent),
searx::results(query, page, &user_agent)
);
let ddg_map_results: HashMap<String, RawSearchResult> = ddg_map_results?;
let searx_map_results: HashMap<String, RawSearchResult> = searx_map_results?;
result_map.extend(ddg_map_results);