0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-25 15:38:21 -05:00

Merge pull request #16 from neon-mmd/rolling

Release 0.6.0
This commit is contained in:
neon_arch 2023-05-09 16:26:51 +00:00 committed by GitHub
commit 16d76bf70d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 143 additions and 91 deletions

9
.dockerignore Normal file
View File

@ -0,0 +1,9 @@
target/
.git/
.github/
.dockerignore
Dockerfile
docker-compose.yml
images
*.md
LICENSE

130
Cargo.lock generated
View File

@ -87,7 +87,7 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6" checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6"
dependencies = [ dependencies = [
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 1.0.109",
] ]
@ -190,7 +190,7 @@ dependencies = [
"serde_urlencoded 0.7.1", "serde_urlencoded 0.7.1",
"smallvec 1.10.0", "smallvec 1.10.0",
"socket2", "socket2",
"time 0.3.20", "time 0.3.21",
"url 2.3.1", "url 2.3.1",
] ]
@ -202,7 +202,7 @@ checksum = "2262160a7ae29e3415554a3f1fc04c764b1540c116aa524683208078b7a75bc9"
dependencies = [ dependencies = [
"actix-router", "actix-router",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 1.0.109",
] ]
@ -480,7 +480,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb" checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb"
dependencies = [ dependencies = [
"percent-encoding 2.2.0", "percent-encoding 2.2.0",
"time 0.3.20", "time 0.3.21",
"version_check", "version_check",
] ]
@ -606,7 +606,7 @@ dependencies = [
"matches", "matches",
"phf 0.10.1", "phf 0.10.1",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"smallvec 1.10.0", "smallvec 1.10.0",
"syn 1.0.109", "syn 1.0.109",
] ]
@ -617,7 +617,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
dependencies = [ dependencies = [
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 1.0.109",
] ]
@ -629,7 +629,7 @@ checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [ dependencies = [
"convert_case", "convert_case",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"rustc_version 0.4.0", "rustc_version 0.4.0",
"syn 1.0.109", "syn 1.0.109",
] ]
@ -731,7 +731,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 1.0.109",
"synstructure", "synstructure",
] ]
@ -1036,7 +1036,7 @@ dependencies = [
"mac", "mac",
"markup5ever 0.11.0", "markup5ever 0.11.0",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 1.0.109",
] ]
@ -1301,9 +1301,9 @@ dependencies = [
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.61" version = "0.3.62"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" checksum = "68c16e1bfd491478ab155fd8b4896b86f9ede344949b641e61501e07c2b8b4d5"
dependencies = [ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
@ -1332,15 +1332,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.142" version = "0.2.144"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.3.6" version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b64f40e5e03e0d54f03845c8197d0291253cdbedfb1cb46b13c2c117554a9f4c" checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
[[package]] [[package]]
name = "local-channel" name = "local-channel"
@ -1632,7 +1632,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 2.0.15", "syn 2.0.15",
] ]
@ -1750,7 +1750,7 @@ dependencies = [
"pest", "pest",
"pest_meta", "pest_meta",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 2.0.15", "syn 2.0.15",
] ]
@ -1864,7 +1864,7 @@ dependencies = [
"phf_shared 0.10.0", "phf_shared 0.10.0",
"proc-macro-hack", "proc-macro-hack",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 1.0.109",
] ]
@ -1909,9 +1909,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]] [[package]]
name = "pkg-config" name = "pkg-config"
version = "0.3.26" version = "0.3.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
@ -1970,9 +1970,9 @@ dependencies = [
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.26" version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
] ]
@ -2314,9 +2314,9 @@ dependencies = [
[[package]] [[package]]
name = "rlua-lua54-sys" name = "rlua-lua54-sys"
version = "0.1.3" version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23ae48797c3e76fb2c205fda8f30e28416a15b9fc1d649cc7cea9ff1fb9cf028" checksum = "93f42202b5aeb0bcc5df28436f8d963f8cbcbb898033a9e28c7ba4f299707934"
dependencies = [ dependencies = [
"cc", "cc",
"libc", "libc",
@ -2349,9 +2349,9 @@ dependencies = [
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "0.37.17" version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc809f704c03a812ac71f22456c857be34185cac691a4316f27ab0f633bb9009" checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"errno", "errno",
@ -2482,21 +2482,21 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.160" version = "1.0.162"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.160" version = "1.0.162"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 2.0.15", "syn 2.0.15",
] ]
@ -2681,7 +2681,7 @@ dependencies = [
"phf_generator 0.7.24", "phf_generator 0.7.24",
"phf_shared 0.7.24", "phf_shared 0.7.24",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"string_cache_shared", "string_cache_shared",
] ]
@ -2694,7 +2694,7 @@ dependencies = [
"phf_generator 0.10.0", "phf_generator 0.10.0",
"phf_shared 0.10.0", "phf_shared 0.10.0",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
] ]
[[package]] [[package]]
@ -2721,7 +2721,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"unicode-ident", "unicode-ident",
] ]
@ -2732,7 +2732,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"unicode-ident", "unicode-ident",
] ]
@ -2743,7 +2743,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 1.0.109",
"unicode-xid 0.2.4", "unicode-xid 0.2.4",
] ]
@ -2797,7 +2797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 2.0.15", "syn 2.0.15",
] ]
@ -2814,9 +2814,9 @@ dependencies = [
[[package]] [[package]]
name = "time" name = "time"
version = "0.3.20" version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc"
dependencies = [ dependencies = [
"itoa 1.0.6", "itoa 1.0.6",
"serde", "serde",
@ -2826,15 +2826,15 @@ dependencies = [
[[package]] [[package]]
name = "time-core" name = "time-core"
version = "0.1.0" version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
[[package]] [[package]]
name = "time-macros" name = "time-macros"
version = "0.2.8" version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b"
dependencies = [ dependencies = [
"time-core", "time-core",
] ]
@ -2941,7 +2941,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 2.0.15", "syn 2.0.15",
] ]
@ -3243,9 +3243,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.84" version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" checksum = "5b6cb788c4e39112fbe1822277ef6fb3c55cd86b95cb3d3c4c1c9597e4ac74b4"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"wasm-bindgen-macro", "wasm-bindgen-macro",
@ -3253,24 +3253,24 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-backend" name = "wasm-bindgen-backend"
version = "0.2.84" version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" checksum = "35e522ed4105a9d626d885b35d62501b30d9666283a5c8be12c14a8bdafe7822"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"log", "log",
"once_cell", "once_cell",
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 2.0.15",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]] [[package]]
name = "wasm-bindgen-futures" name = "wasm-bindgen-futures"
version = "0.4.34" version = "0.4.35"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" checksum = "083abe15c5d88556b77bdf7aef403625be9e327ad37c62c4e4129af740168163"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"js-sys", "js-sys",
@ -3280,38 +3280,38 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.84" version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" checksum = "358a79a0cb89d21db8120cbfb91392335913e4890665b1a7981d9e956903b434"
dependencies = [ dependencies = [
"quote 1.0.26", "quote 1.0.27",
"wasm-bindgen-macro-support", "wasm-bindgen-macro-support",
] ]
[[package]] [[package]]
name = "wasm-bindgen-macro-support" name = "wasm-bindgen-macro-support"
version = "0.2.84" version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" checksum = "4783ce29f09b9d93134d41297aded3a712b7b979e9c6f28c32cb88c973a94869"
dependencies = [ dependencies = [
"proc-macro2 1.0.56", "proc-macro2 1.0.56",
"quote 1.0.26", "quote 1.0.27",
"syn 1.0.109", "syn 2.0.15",
"wasm-bindgen-backend", "wasm-bindgen-backend",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]] [[package]]
name = "wasm-bindgen-shared" name = "wasm-bindgen-shared"
version = "0.2.84" version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" checksum = "a901d592cafaa4d711bc324edfaff879ac700b19c3dfd60058d2b445be2691eb"
[[package]] [[package]]
name = "web-sys" name = "web-sys"
version = "0.3.61" version = "0.3.62"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" checksum = "16b5f940c7edfdc6d12126d98c9ef4d1b3d470011c47c76a6581df47ad9ba721"
dependencies = [ dependencies = [
"js-sys", "js-sys",
"wasm-bindgen", "wasm-bindgen",
@ -3319,7 +3319,7 @@ dependencies = [
[[package]] [[package]]
name = "websurfx" name = "websurfx"
version = "0.1.0" version = "0.6.0"
dependencies = [ dependencies = [
"actix-files", "actix-files",
"actix-web", "actix-web",
@ -3328,7 +3328,7 @@ dependencies = [
"handlebars", "handlebars",
"log", "log",
"md5", "md5",
"rand 0.6.5", "rand 0.8.5",
"redis", "redis",
"reqwest 0.11.17", "reqwest 0.11.17",
"rlua", "rlua",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "websurfx" name = "websurfx"
version = "0.1.0" version = "0.6.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

26
Dockerfile Normal file
View File

@ -0,0 +1,26 @@
FROM rust:latest AS chef
# We only pay the installation cost once,
# it will be cached from the second build onwards
RUN cargo install cargo-chef
WORKDIR app
FROM chef AS planner
COPY . .
RUN cargo chef prepare --recipe-path recipe.json
FROM chef AS builder
COPY --from=planner /app/recipe.json recipe.json
# Build dependencies - this is the caching Docker layer!
RUN cargo chef cook --release --recipe-path recipe.json
# Build application
COPY . .
RUN cargo install --path .
# We do not need the Rust toolchain to run the binary!
FROM gcr.io/distroless/cc-debian11
COPY --from=builder ./public/ ./public/
COPY --from=builder ./websurfx/ ./websurfx/
COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
CMD ["websurfx"]

15
docker-compose.yml Normal file
View File

@ -0,0 +1,15 @@
version: "3.9"
services:
app:
image: websurfx:latest
build: .
ports:
- 8080:8080
depends_on:
- redis
links:
- redis
redis:
image: redis:latest
ports:
- 6379:6379

View File

@ -2,10 +2,9 @@
//! by querying the upstream duckduckgo search engine with user provided query and with a page //! by querying the upstream duckduckgo search engine with user provided query and with a page
//! number if provided. //! number if provided.
use std::{collections::HashMap, time::Duration}; use std::collections::HashMap;
use rand::Rng; use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use crate::search_results_handler::aggregation_models::RawSearchResult; use crate::search_results_handler::aggregation_models::RawSearchResult;
@ -47,16 +46,12 @@ pub async fn results(
} }
}; };
// Add a random delay before making the request.
let mut rng = rand::thread_rng();
let delay_secs = rng.gen_range(1, 10);
std::thread::sleep(Duration::from_secs(delay_secs));
// initializing HeaderMap and adding appropriate headers. // initializing HeaderMap and adding appropriate headers.
let mut header_map = HeaderMap::new(); let mut header_map = HeaderMap::new();
header_map.insert(USER_AGENT, user_agent.parse()?); header_map.insert(USER_AGENT, user_agent.parse()?);
header_map.insert(REFERER, "https://google.com/".parse()?); header_map.insert(REFERER, "https://google.com/".parse()?);
header_map.insert(CONTENT_TYPE, "text/html; charset=UTF-8".parse()?); header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
header_map.insert(COOKIE, "kl=wt-wt".parse()?);
// fetch the html from upstream duckduckgo engine // fetch the html from upstream duckduckgo engine
// TODO: Write better error handling code to handle no results case. // TODO: Write better error handling code to handle no results case.

View File

@ -2,10 +2,9 @@
//! by querying the upstream searx search engine instance with user provided query and with a page //! by querying the upstream searx search engine instance with user provided query and with a page
//! number if provided. //! number if provided.
use rand::Rng; use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use std::{collections::HashMap, time::Duration}; use std::collections::HashMap;
use crate::search_results_handler::aggregation_models::RawSearchResult; use crate::search_results_handler::aggregation_models::RawSearchResult;
@ -34,16 +33,12 @@ pub async fn results(
// so that upstream server recieves valid page number. // so that upstream server recieves valid page number.
let url: String = format!("https://searx.work/search?q={query}&pageno={page}"); let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
// Add random delay before making the request.
let mut rng = rand::thread_rng();
let delay_secs = rng.gen_range(1, 10);
std::thread::sleep(Duration::from_secs(delay_secs));
// initializing headers and adding appropriate headers. // initializing headers and adding appropriate headers.
let mut header_map = HeaderMap::new(); let mut header_map = HeaderMap::new();
header_map.insert(USER_AGENT, user_agent.parse()?); header_map.insert(USER_AGENT, user_agent.parse()?);
header_map.insert(REFERER, "https://google.com/".parse()?); header_map.insert(REFERER, "https://google.com/".parse()?);
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?); header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
// fetch the html from upstream searx instance engine // fetch the html from upstream searx instance engine
// TODO: Write better error handling code to handle no results case. // TODO: Write better error handling code to handle no results case.

View File

@ -1,7 +1,10 @@
//! This module provides the functionality to scrape and gathers all the results from the upstream //! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results. //! search engines and then removes duplicate results.
use std::collections::HashMap; use std::{collections::HashMap, time::Duration};
use rand::Rng;
use tokio::join;
use super::{ use super::{
aggregation_models::{RawSearchResult, SearchResult, SearchResults}, aggregation_models::{RawSearchResult, SearchResult, SearchResults},
@ -39,10 +42,19 @@ pub async fn aggregate(
let user_agent: String = random_user_agent(); let user_agent: String = random_user_agent();
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new(); let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
let ddg_map_results: HashMap<String, RawSearchResult> = // Add a random delay before making the request.
duckduckgo::results(query, page, &user_agent).await?; let mut rng = rand::thread_rng();
let searx_map_results: HashMap<String, RawSearchResult> = let delay_secs = rng.gen_range(1..10);
searx::results(query, page, &user_agent).await?; std::thread::sleep(Duration::from_secs(delay_secs));
// fetch results from upstream search engines simultaneously/concurrently.
let (ddg_map_results, searx_map_results) = join!(
duckduckgo::results(query, page, &user_agent),
searx::results(query, page, &user_agent)
);
let ddg_map_results: HashMap<String, RawSearchResult> = ddg_map_results?;
let searx_map_results: HashMap<String, RawSearchResult> = searx_map_results?;
result_map.extend(ddg_map_results); result_map.extend(ddg_map_results);