mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-12-22 20:38:22 -05:00
commit
16d76bf70d
9
.dockerignore
Normal file
9
.dockerignore
Normal file
@ -0,0 +1,9 @@
|
||||
target/
|
||||
.git/
|
||||
.github/
|
||||
.dockerignore
|
||||
Dockerfile
|
||||
docker-compose.yml
|
||||
images
|
||||
*.md
|
||||
LICENSE
|
130
Cargo.lock
generated
130
Cargo.lock
generated
@ -87,7 +87,7 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6"
|
||||
dependencies = [
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
@ -190,7 +190,7 @@ dependencies = [
|
||||
"serde_urlencoded 0.7.1",
|
||||
"smallvec 1.10.0",
|
||||
"socket2",
|
||||
"time 0.3.20",
|
||||
"time 0.3.21",
|
||||
"url 2.3.1",
|
||||
]
|
||||
|
||||
@ -202,7 +202,7 @@ checksum = "2262160a7ae29e3415554a3f1fc04c764b1540c116aa524683208078b7a75bc9"
|
||||
dependencies = [
|
||||
"actix-router",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
@ -480,7 +480,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb"
|
||||
dependencies = [
|
||||
"percent-encoding 2.2.0",
|
||||
"time 0.3.20",
|
||||
"time 0.3.21",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
@ -606,7 +606,7 @@ dependencies = [
|
||||
"matches",
|
||||
"phf 0.10.1",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"smallvec 1.10.0",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
@ -617,7 +617,7 @@ version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
|
||||
dependencies = [
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
@ -629,7 +629,7 @@ checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
|
||||
dependencies = [
|
||||
"convert_case",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"rustc_version 0.4.0",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
@ -731,7 +731,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 1.0.109",
|
||||
"synstructure",
|
||||
]
|
||||
@ -1036,7 +1036,7 @@ dependencies = [
|
||||
"mac",
|
||||
"markup5ever 0.11.0",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
@ -1301,9 +1301,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.61"
|
||||
version = "0.3.62"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730"
|
||||
checksum = "68c16e1bfd491478ab155fd8b4896b86f9ede344949b641e61501e07c2b8b4d5"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
@ -1332,15 +1332,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.142"
|
||||
version = "0.2.144"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317"
|
||||
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.3.6"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b64f40e5e03e0d54f03845c8197d0291253cdbedfb1cb46b13c2c117554a9f4c"
|
||||
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
|
||||
|
||||
[[package]]
|
||||
name = "local-channel"
|
||||
@ -1632,7 +1632,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 2.0.15",
|
||||
]
|
||||
|
||||
@ -1750,7 +1750,7 @@ dependencies = [
|
||||
"pest",
|
||||
"pest_meta",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 2.0.15",
|
||||
]
|
||||
|
||||
@ -1864,7 +1864,7 @@ dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
"proc-macro-hack",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
@ -1909,9 +1909,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.26"
|
||||
version = "0.3.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
||||
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
@ -1970,9 +1970,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.26"
|
||||
version = "1.0.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
|
||||
checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
]
|
||||
@ -2314,9 +2314,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rlua-lua54-sys"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23ae48797c3e76fb2c205fda8f30e28416a15b9fc1d649cc7cea9ff1fb9cf028"
|
||||
checksum = "93f42202b5aeb0bcc5df28436f8d963f8cbcbb898033a9e28c7ba4f299707934"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@ -2349,9 +2349,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.37.17"
|
||||
version = "0.37.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc809f704c03a812ac71f22456c857be34185cac691a4316f27ab0f633bb9009"
|
||||
checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
@ -2482,21 +2482,21 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.160"
|
||||
version = "1.0.162"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c"
|
||||
checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.160"
|
||||
version = "1.0.162"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df"
|
||||
checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 2.0.15",
|
||||
]
|
||||
|
||||
@ -2681,7 +2681,7 @@ dependencies = [
|
||||
"phf_generator 0.7.24",
|
||||
"phf_shared 0.7.24",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"string_cache_shared",
|
||||
]
|
||||
|
||||
@ -2694,7 +2694,7 @@ dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2721,7 +2721,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
@ -2732,7 +2732,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
@ -2743,7 +2743,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 1.0.109",
|
||||
"unicode-xid 0.2.4",
|
||||
]
|
||||
@ -2797,7 +2797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 2.0.15",
|
||||
]
|
||||
|
||||
@ -2814,9 +2814,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.20"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890"
|
||||
checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc"
|
||||
dependencies = [
|
||||
"itoa 1.0.6",
|
||||
"serde",
|
||||
@ -2826,15 +2826,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time-core"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd"
|
||||
checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
|
||||
|
||||
[[package]]
|
||||
name = "time-macros"
|
||||
version = "0.2.8"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36"
|
||||
checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b"
|
||||
dependencies = [
|
||||
"time-core",
|
||||
]
|
||||
@ -2941,7 +2941,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"syn 2.0.15",
|
||||
]
|
||||
|
||||
@ -3243,9 +3243,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.84"
|
||||
version = "0.2.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b"
|
||||
checksum = "5b6cb788c4e39112fbe1822277ef6fb3c55cd86b95cb3d3c4c1c9597e4ac74b4"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"wasm-bindgen-macro",
|
||||
@ -3253,24 +3253,24 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.84"
|
||||
version = "0.2.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9"
|
||||
checksum = "35e522ed4105a9d626d885b35d62501b30d9666283a5c8be12c14a8bdafe7822"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"once_cell",
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"syn 1.0.109",
|
||||
"quote 1.0.27",
|
||||
"syn 2.0.15",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-futures"
|
||||
version = "0.4.34"
|
||||
version = "0.4.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454"
|
||||
checksum = "083abe15c5d88556b77bdf7aef403625be9e327ad37c62c4e4129af740168163"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"js-sys",
|
||||
@ -3280,38 +3280,38 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.84"
|
||||
version = "0.2.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5"
|
||||
checksum = "358a79a0cb89d21db8120cbfb91392335913e4890665b1a7981d9e956903b434"
|
||||
dependencies = [
|
||||
"quote 1.0.26",
|
||||
"quote 1.0.27",
|
||||
"wasm-bindgen-macro-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.84"
|
||||
version = "0.2.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6"
|
||||
checksum = "4783ce29f09b9d93134d41297aded3a712b7b979e9c6f28c32cb88c973a94869"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.56",
|
||||
"quote 1.0.26",
|
||||
"syn 1.0.109",
|
||||
"quote 1.0.27",
|
||||
"syn 2.0.15",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.84"
|
||||
version = "0.2.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d"
|
||||
checksum = "a901d592cafaa4d711bc324edfaff879ac700b19c3dfd60058d2b445be2691eb"
|
||||
|
||||
[[package]]
|
||||
name = "web-sys"
|
||||
version = "0.3.61"
|
||||
version = "0.3.62"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97"
|
||||
checksum = "16b5f940c7edfdc6d12126d98c9ef4d1b3d470011c47c76a6581df47ad9ba721"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
@ -3319,7 +3319,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "websurfx"
|
||||
version = "0.1.0"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"actix-files",
|
||||
"actix-web",
|
||||
@ -3328,7 +3328,7 @@ dependencies = [
|
||||
"handlebars",
|
||||
"log",
|
||||
"md5",
|
||||
"rand 0.6.5",
|
||||
"rand 0.8.5",
|
||||
"redis",
|
||||
"reqwest 0.11.17",
|
||||
"rlua",
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "websurfx"
|
||||
version = "0.1.0"
|
||||
version = "0.6.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
26
Dockerfile
Normal file
26
Dockerfile
Normal file
@ -0,0 +1,26 @@
|
||||
FROM rust:latest AS chef
|
||||
# We only pay the installation cost once,
|
||||
# it will be cached from the second build onwards
|
||||
RUN cargo install cargo-chef
|
||||
|
||||
WORKDIR app
|
||||
|
||||
FROM chef AS planner
|
||||
COPY . .
|
||||
RUN cargo chef prepare --recipe-path recipe.json
|
||||
|
||||
FROM chef AS builder
|
||||
COPY --from=planner /app/recipe.json recipe.json
|
||||
# Build dependencies - this is the caching Docker layer!
|
||||
RUN cargo chef cook --release --recipe-path recipe.json
|
||||
|
||||
# Build application
|
||||
COPY . .
|
||||
RUN cargo install --path .
|
||||
|
||||
# We do not need the Rust toolchain to run the binary!
|
||||
FROM gcr.io/distroless/cc-debian11
|
||||
COPY --from=builder ./public/ ./public/
|
||||
COPY --from=builder ./websurfx/ ./websurfx/
|
||||
COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
|
||||
CMD ["websurfx"]
|
15
docker-compose.yml
Normal file
15
docker-compose.yml
Normal file
@ -0,0 +1,15 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
app:
|
||||
image: websurfx:latest
|
||||
build: .
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
- redis
|
||||
links:
|
||||
- redis
|
||||
redis:
|
||||
image: redis:latest
|
||||
ports:
|
||||
- 6379:6379
|
@ -2,10 +2,9 @@
|
||||
//! by querying the upstream duckduckgo search engine with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use std::{collections::HashMap, time::Duration};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rand::Rng;
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
@ -47,16 +46,12 @@ pub async fn results(
|
||||
}
|
||||
};
|
||||
|
||||
// Add a random delay before making the request.
|
||||
let mut rng = rand::thread_rng();
|
||||
let delay_secs = rng.gen_range(1, 10);
|
||||
std::thread::sleep(Duration::from_secs(delay_secs));
|
||||
|
||||
// initializing HeaderMap and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(USER_AGENT, user_agent.parse()?);
|
||||
header_map.insert(REFERER, "https://google.com/".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "text/html; charset=UTF-8".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
||||
header_map.insert(COOKIE, "kl=wt-wt".parse()?);
|
||||
|
||||
// fetch the html from upstream duckduckgo engine
|
||||
// TODO: Write better error handling code to handle no results case.
|
||||
|
@ -2,10 +2,9 @@
|
||||
//! by querying the upstream searx search engine instance with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use rand::Rng;
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use scraper::{Html, Selector};
|
||||
use std::{collections::HashMap, time::Duration};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
|
||||
@ -34,16 +33,12 @@ pub async fn results(
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
|
||||
|
||||
// Add random delay before making the request.
|
||||
let mut rng = rand::thread_rng();
|
||||
let delay_secs = rng.gen_range(1, 10);
|
||||
std::thread::sleep(Duration::from_secs(delay_secs));
|
||||
|
||||
// initializing headers and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(USER_AGENT, user_agent.parse()?);
|
||||
header_map.insert(REFERER, "https://google.com/".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
||||
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
|
||||
|
||||
// fetch the html from upstream searx instance engine
|
||||
// TODO: Write better error handling code to handle no results case.
|
||||
|
@ -1,7 +1,10 @@
|
||||
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
||||
//! search engines and then removes duplicate results.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::{collections::HashMap, time::Duration};
|
||||
|
||||
use rand::Rng;
|
||||
use tokio::join;
|
||||
|
||||
use super::{
|
||||
aggregation_models::{RawSearchResult, SearchResult, SearchResults},
|
||||
@ -14,14 +17,14 @@ use crate::engines::{duckduckgo, searx};
|
||||
/// then removes duplicate results and if two results are found to be from two or more engines
|
||||
/// then puts their names together to show the results are fetched from these upstream engines
|
||||
/// and then removes all data from the HashMap and puts into a struct of all results aggregated
|
||||
/// into a vector and also adds the query used into the struct this is neccessory because
|
||||
/// into a vector and also adds the query used into the struct this is neccessory because
|
||||
/// otherwise the search bar in search remains empty if searched from the query url
|
||||
///
|
||||
/// # Example:
|
||||
///
|
||||
/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
|
||||
/// contain the word huston and not remain empty.
|
||||
///
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Accepts a string to query with the above upstream search engines.
|
||||
@ -29,7 +32,7 @@ use crate::engines::{duckduckgo, searx};
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// Returns an error a reqwest and scraping selector errors if any error occurs in the results
|
||||
/// Returns an error a reqwest and scraping selector errors if any error occurs in the results
|
||||
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
||||
/// containing appropriate values.
|
||||
pub async fn aggregate(
|
||||
@ -39,10 +42,19 @@ pub async fn aggregate(
|
||||
let user_agent: String = random_user_agent();
|
||||
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
|
||||
|
||||
let ddg_map_results: HashMap<String, RawSearchResult> =
|
||||
duckduckgo::results(query, page, &user_agent).await?;
|
||||
let searx_map_results: HashMap<String, RawSearchResult> =
|
||||
searx::results(query, page, &user_agent).await?;
|
||||
// Add a random delay before making the request.
|
||||
let mut rng = rand::thread_rng();
|
||||
let delay_secs = rng.gen_range(1..10);
|
||||
std::thread::sleep(Duration::from_secs(delay_secs));
|
||||
|
||||
// fetch results from upstream search engines simultaneously/concurrently.
|
||||
let (ddg_map_results, searx_map_results) = join!(
|
||||
duckduckgo::results(query, page, &user_agent),
|
||||
searx::results(query, page, &user_agent)
|
||||
);
|
||||
|
||||
let ddg_map_results: HashMap<String, RawSearchResult> = ddg_map_results?;
|
||||
let searx_map_results: HashMap<String, RawSearchResult> = searx_map_results?;
|
||||
|
||||
result_map.extend(ddg_map_results);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user