mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-22 14:08:23 -05:00
Merge branch 'rolling' into bing-search-engine
This commit is contained in:
commit
24099330e2
14
.github/dependabot.yml
vendored
Normal file
14
.github/dependabot.yml
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
version: 2
|
||||||
|
updates:
|
||||||
|
- package-ecosystem: "cargo"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "monthly"
|
||||||
|
- package-ecosystem: "github-actions"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "monthly"
|
||||||
|
- package-ecosystem: "docker"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "monthly"
|
4
.github/workflows/contributors.yml
vendored
4
.github/workflows/contributors.yml
vendored
@ -17,13 +17,13 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
|
uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
ref: ${{ github.event.repository.default_branch }}
|
ref: ${{ github.event.repository.default_branch }}
|
||||||
|
|
||||||
- name: Update contributors list
|
- name: Update contributors list
|
||||||
uses: wow-actions/contributors-list@b9e91f91a51a55460fdcae64daad0cb8122cdd53 # v1.1.0
|
uses: wow-actions/contributors-list@242b53835016268d20e79eeff6f42193c02be8c8 # v1.2.0
|
||||||
with:
|
with:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
svgPath: images/contributors_list.svg
|
svgPath: images/contributors_list.svg
|
||||||
|
2
.github/workflows/labels.yml
vendored
2
.github/workflows/labels.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/setup-node@v2
|
- uses: actions/setup-node@v3
|
||||||
with:
|
with:
|
||||||
node-version: '14'
|
node-version: '14'
|
||||||
- uses: EddieHubCommunity/gh-action-open-source-labels@main
|
- uses: EddieHubCommunity/gh-action-open-source-labels@main
|
||||||
|
2
.github/workflows/mega-linter.yml
vendored
2
.github/workflows/mega-linter.yml
vendored
@ -32,7 +32,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
# Git Checkout
|
# Git Checkout
|
||||||
- name: Checkout Code
|
- name: Checkout Code
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }}
|
token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
4
.github/workflows/rust.yml
vendored
4
.github/workflows/rust.yml
vendored
@ -25,7 +25,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y --no-install-recommends liblua5.4-dev liblua5.3-dev liblua5.2-dev liblua5.1-0-dev libluajit-5.1-dev
|
sudo apt-get install -y --no-install-recommends liblua5.4-dev liblua5.3-dev liblua5.2-dev liblua5.1-0-dev libluajit-5.1-dev
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- run: rustup toolchain install stable --profile minimal
|
- run: rustup toolchain install stable --profile minimal
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
with:
|
with:
|
||||||
@ -39,7 +39,7 @@ jobs:
|
|||||||
cache-on-failure: ''
|
cache-on-failure: ''
|
||||||
cache-all-crates: ''
|
cache-all-crates: ''
|
||||||
save-if: ''
|
save-if: ''
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
|
- run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --verbose
|
run: cargo build --verbose
|
||||||
|
2
.github/workflows/rust_format.yml
vendored
2
.github/workflows/rust_format.yml
vendored
@ -17,7 +17,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y --no-install-recommends liblua5.4-dev liblua5.3-dev liblua5.2-dev liblua5.1-0-dev libluajit-5.1-dev
|
sudo apt-get install -y --no-install-recommends liblua5.4-dev liblua5.3-dev liblua5.2-dev liblua5.1-0-dev libluajit-5.1-dev
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
- name: Install minimal stable with clippy and rustfmt
|
- name: Install minimal stable with clippy and rustfmt
|
||||||
uses: actions-rs/toolchain@v1
|
uses: actions-rs/toolchain@v1
|
||||||
with:
|
with:
|
||||||
|
2
.github/workflows/stale.yml
vendored
2
.github/workflows/stale.yml
vendored
@ -19,7 +19,7 @@ jobs:
|
|||||||
pull-requests: write
|
pull-requests: write
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@v5
|
- uses: actions/stale@v8
|
||||||
with:
|
with:
|
||||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
stale-issue-message: 'Stale issue message'
|
stale-issue-message: 'Stale issue message'
|
||||||
|
18
.mergify.yml
Normal file
18
.mergify.yml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
pull_request_rules:
|
||||||
|
- name: Automatic merge on approval
|
||||||
|
conditions:
|
||||||
|
- "status-success=checks/approved"
|
||||||
|
- "#approved-reviews-by>=2"
|
||||||
|
actions:
|
||||||
|
queue:
|
||||||
|
method: squash
|
||||||
|
- name: automatic update of pull requests where more 5 commits behind
|
||||||
|
conditions:
|
||||||
|
- "#commits-behind>5"
|
||||||
|
actions:
|
||||||
|
update:
|
||||||
|
- name: delete head branch after merge
|
||||||
|
conditions:
|
||||||
|
- merged
|
||||||
|
actions:
|
||||||
|
delete_head_branch: {}
|
69
Cargo.lock
generated
69
Cargo.lock
generated
@ -433,11 +433,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bstr"
|
name = "bstr"
|
||||||
version = "0.2.17"
|
version = "1.6.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
|
checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -448,9 +449,9 @@ checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytecount"
|
name = "bytecount"
|
||||||
version = "0.6.3"
|
version = "0.6.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
checksum = "ad152d03a2c813c80bb94fedbf3a3f02b28f793e39e7c214c8a0bcc196343de7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "byteorder"
|
name = "byteorder"
|
||||||
@ -991,9 +992,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "errno"
|
name = "errno"
|
||||||
version = "0.3.3"
|
version = "0.3.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd"
|
checksum = "add4f07d43996f76ef320709726a556a9d4f965d9410d8d0271132d2f8293480"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"errno-dragonfly",
|
"errno-dragonfly",
|
||||||
"libc",
|
"libc",
|
||||||
@ -1866,9 +1867,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.6.3"
|
version = "2.6.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
|
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memoffset"
|
name = "memoffset"
|
||||||
@ -1992,20 +1993,30 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mlua"
|
name = "mlua"
|
||||||
version = "0.8.10"
|
version = "0.9.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0bb37b0ba91f017aa7ca2b98ef99496827770cd635b4a932a6047c5b4bbe678e"
|
checksum = "6c3a7a7ff4481ec91b951a733390211a8ace1caba57266ccb5f4d4966704e560"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bstr",
|
"bstr",
|
||||||
"cc",
|
"mlua-sys",
|
||||||
"lua-src",
|
|
||||||
"luajit-src",
|
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pkg-config",
|
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mlua-sys"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3ec8b54eddb76093069cce9eeffb4c7b3a1a0fe66962d7bd44c4867928149ca3"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"lua-src",
|
||||||
|
"luajit-src",
|
||||||
|
"pkg-config",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "native-tls"
|
name = "native-tls"
|
||||||
version = "0.2.11"
|
version = "0.2.11"
|
||||||
@ -2802,9 +2813,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest"
|
name = "reqwest"
|
||||||
version = "0.11.20"
|
version = "0.11.22"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1"
|
checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.21.4",
|
"base64 0.21.4",
|
||||||
"bytes 1.5.0",
|
"bytes 1.5.0",
|
||||||
@ -2827,6 +2838,7 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_urlencoded 0.7.1",
|
"serde_urlencoded 0.7.1",
|
||||||
|
"system-configuration",
|
||||||
"tokio 1.32.0",
|
"tokio 1.32.0",
|
||||||
"tokio-native-tls",
|
"tokio-native-tls",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
@ -3320,6 +3332,27 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "system-configuration"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 1.3.2",
|
||||||
|
"core-foundation",
|
||||||
|
"system-configuration-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "system-configuration-sys"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
|
||||||
|
dependencies = [
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tagptr"
|
name = "tagptr"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@ -3932,7 +3965,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "1.0.0"
|
version = "1.0.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-files",
|
"actix-files",
|
||||||
@ -3956,7 +3989,7 @@ dependencies = [
|
|||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
"redis",
|
"redis",
|
||||||
"regex",
|
"regex",
|
||||||
"reqwest 0.11.20",
|
"reqwest 0.11.22",
|
||||||
"rusty-hook",
|
"rusty-hook",
|
||||||
"scraper",
|
"scraper",
|
||||||
"serde",
|
"serde",
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "websurfx"
|
name = "websurfx"
|
||||||
version = "1.0.0"
|
version = "1.0.11"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
||||||
repository = "https://github.com/neon-mmd/websurfx"
|
repository = "https://github.com/neon-mmd/websurfx"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
reqwest = {version="0.11.20",features=["json"]}
|
reqwest = {version="0.11.21",features=["json"]}
|
||||||
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
||||||
serde = {version="1.0.188",features=["derive"]}
|
serde = {version="1.0.188",features=["derive"]}
|
||||||
handlebars = { version = "4.4.0", features = ["dir_source"] }
|
handlebars = { version = "4.4.0", features = ["dir_source"] }
|
||||||
@ -19,7 +19,7 @@ serde_json = {version="1.0.105"}
|
|||||||
fake-useragent = {version="0.1.3"}
|
fake-useragent = {version="0.1.3"}
|
||||||
env_logger = {version="0.10.0"}
|
env_logger = {version="0.10.0"}
|
||||||
log = {version="0.4.20"}
|
log = {version="0.4.20"}
|
||||||
mlua = {version="0.8.10", features=["luajit", "vendored"]}
|
mlua = {version="0.9.1", features=["luajit", "vendored"]}
|
||||||
redis = {version="0.23.3", features=["tokio-comp","connection-manager"], optional = true}
|
redis = {version="0.23.3", features=["tokio-comp","connection-manager"], optional = true}
|
||||||
md5 = {version="0.7.0"}
|
md5 = {version="0.7.0"}
|
||||||
rand={version="0.8.5"}
|
rand={version="0.8.5"}
|
||||||
|
@ -35,6 +35,12 @@
|
|||||||
src="https://img.shields.io/maintenance/yes/2023?style=flat-square"
|
src="https://img.shields.io/maintenance/yes/2023?style=flat-square"
|
||||||
/>
|
/>
|
||||||
</a>
|
</a>
|
||||||
|
<a href="https://www.codefactor.io/repository/github/neon-mmd/websurfx">
|
||||||
|
<img
|
||||||
|
alt="CodeFactor"
|
||||||
|
src="https://www.codefactor.io/repository/github/neon-mmd/websurfx/badge"
|
||||||
|
/>
|
||||||
|
</a>
|
||||||
<a href="https://gitpod.io/#https://github.com/neon-mmd/websurfx">
|
<a href="https://gitpod.io/#https://github.com/neon-mmd/websurfx">
|
||||||
<img
|
<img
|
||||||
alt="Gitpod"
|
alt="Gitpod"
|
||||||
@ -106,6 +112,9 @@
|
|||||||
# Features 🚀
|
# Features 🚀
|
||||||
|
|
||||||
- 🎨 Make Websurfx uniquely yours with twelve color schemes provided by default. It also supports creation of custom themes and color schemes in a quick and easy way, so unleash your creativity!
|
- 🎨 Make Websurfx uniquely yours with twelve color schemes provided by default. It also supports creation of custom themes and color schemes in a quick and easy way, so unleash your creativity!
|
||||||
|
- 🚀 Easy to setup with docker or on bare metal with various installation/deployement options.
|
||||||
|
- ⛔ Search filtering to filter search results based on four different levels.
|
||||||
|
- 💾 Different caching levels focusing on reliability, speed and resiliancy.
|
||||||
- 🔐 Fast, private, and secure
|
- 🔐 Fast, private, and secure
|
||||||
- 🆓 100% free and open source
|
- 🆓 100% free and open source
|
||||||
- 💨 Ad-free and clean results
|
- 💨 Ad-free and clean results
|
||||||
|
@ -15,6 +15,4 @@ services:
|
|||||||
- ./websurfx/:/etc/xdg/websurfx/
|
- ./websurfx/:/etc/xdg/websurfx/
|
||||||
# Uncomment the following lines if you are using the `hybrid` or `redis` caching feature.
|
# Uncomment the following lines if you are using the `hybrid` or `redis` caching feature.
|
||||||
# redis:
|
# redis:
|
||||||
# image: redis:latest
|
# image: redis:latest
|
||||||
# ports:
|
|
||||||
# - 6379:6379
|
|
@ -217,8 +217,6 @@ services:
|
|||||||
# Uncomment the following lines if you are using the `hybrid/latest` or `redis` image.
|
# Uncomment the following lines if you are using the `hybrid/latest` or `redis` image.
|
||||||
# redis:
|
# redis:
|
||||||
# image: redis:latest
|
# image: redis:latest
|
||||||
# ports:
|
|
||||||
# - 6379:6379
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Then make sure to edit the `docker-compose.yml` file as required. After that create a directory `websurfx` in the directory you have placed the `docker-compose.yml` file, and then in the new directory create two new empty files named `allowlist.txt` and `blocklist.txt`. Finally, create a new config file `config.lua` with the default configuration, which looks something like this:
|
Then make sure to edit the `docker-compose.yml` file as required. After that create a directory `websurfx` in the directory you have placed the `docker-compose.yml` file, and then in the new directory create two new empty files named `allowlist.txt` and `blocklist.txt`. Finally, create a new config file `config.lua` with the default configuration, which looks something like this:
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
use crate::handler::paths::{file_path, FileType};
|
use crate::handler::paths::{file_path, FileType};
|
||||||
|
|
||||||
|
use crate::models::engine_models::{EngineError, EngineHandler};
|
||||||
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
||||||
use log::LevelFilter;
|
use log::LevelFilter;
|
||||||
use mlua::Lua;
|
use mlua::Lua;
|
||||||
@ -28,7 +29,7 @@ pub struct Config {
|
|||||||
/// It stores the option to whether enable or disable debug mode.
|
/// It stores the option to whether enable or disable debug mode.
|
||||||
pub debug: bool,
|
pub debug: bool,
|
||||||
/// It stores all the engine names that were enabled by the user.
|
/// It stores all the engine names that were enabled by the user.
|
||||||
pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
|
pub upstream_search_engines: Vec<EngineHandler>,
|
||||||
/// It stores the time (secs) which controls the server request timeout.
|
/// It stores the time (secs) which controls the server request timeout.
|
||||||
pub request_timeout: u8,
|
pub request_timeout: u8,
|
||||||
/// It stores the number of threads which controls the app will use to run.
|
/// It stores the number of threads which controls the app will use to run.
|
||||||
@ -111,8 +112,8 @@ impl Config {
|
|||||||
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(key, value)| value.then_some(key))
|
.filter_map(|(key, value)| value.then_some(key))
|
||||||
.filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
|
.map(|engine| EngineHandler::new(&engine))
|
||||||
.collect(),
|
.collect::<Result<Vec<EngineHandler>, error_stack::Report<EngineError>>>()?,
|
||||||
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
||||||
threads,
|
threads,
|
||||||
rate_limiter: RateLimiter {
|
rate_limiter: RateLimiter {
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use reqwest::header::HeaderMap;
|
use reqwest::header::HeaderMap;
|
||||||
use scraper::{Html, Selector};
|
use scraper::Html;
|
||||||
|
|
||||||
use crate::models::aggregation_models::SearchResult;
|
use crate::models::aggregation_models::SearchResult;
|
||||||
|
|
||||||
@ -13,9 +13,29 @@ use crate::models::engine_models::{EngineError, SearchEngine};
|
|||||||
|
|
||||||
use error_stack::{Report, Result, ResultExt};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
|
|
||||||
|
use super::search_result_parser::SearchResultParser;
|
||||||
|
|
||||||
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||||
pub struct DuckDuckGo;
|
pub struct DuckDuckGo {
|
||||||
|
/// The parser, used to interpret the search result.
|
||||||
|
parser: SearchResultParser,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DuckDuckGo {
|
||||||
|
/// Creates the DuckDuckGo parser.
|
||||||
|
pub fn new() -> Result<Self, EngineError> {
|
||||||
|
Ok(Self {
|
||||||
|
parser: SearchResultParser::new(
|
||||||
|
".no-results",
|
||||||
|
".result",
|
||||||
|
".result__a",
|
||||||
|
".result__url",
|
||||||
|
".result__snippet",
|
||||||
|
)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl SearchEngine for DuckDuckGo {
|
impl SearchEngine for DuckDuckGo {
|
||||||
@ -59,58 +79,19 @@ impl SearchEngine for DuckDuckGo {
|
|||||||
&DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
&DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||||
);
|
);
|
||||||
|
|
||||||
let no_result: Selector = Selector::parse(".no-results")
|
if self.parser.parse_for_no_results(&document).next().is_some() {
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".no-results"))?;
|
|
||||||
|
|
||||||
if document.select(&no_result).next().is_some() {
|
|
||||||
return Err(Report::new(EngineError::EmptyResultSet));
|
return Err(Report::new(EngineError::EmptyResultSet));
|
||||||
}
|
}
|
||||||
|
|
||||||
let results: Selector = Selector::parse(".result")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
|
|
||||||
let result_title: Selector = Selector::parse(".result__a")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__a"))?;
|
|
||||||
let result_url: Selector = Selector::parse(".result__url")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__url"))?;
|
|
||||||
let result_desc: Selector = Selector::parse(".result__snippet")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__snippet"))?;
|
|
||||||
|
|
||||||
// scrape all the results from the html
|
// scrape all the results from the html
|
||||||
Ok(document
|
self.parser
|
||||||
.select(&results)
|
.parse_for_results(&document, |title, url, desc| {
|
||||||
.map(|result| {
|
Some(SearchResult::new(
|
||||||
SearchResult::new(
|
title.inner_html().trim(),
|
||||||
result
|
&format!("https://{}", url.inner_html().trim()),
|
||||||
.select(&result_title)
|
desc.inner_html().trim(),
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.inner_html()
|
|
||||||
.trim(),
|
|
||||||
format!(
|
|
||||||
"https://{}",
|
|
||||||
result
|
|
||||||
.select(&result_url)
|
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.inner_html()
|
|
||||||
.trim()
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
result
|
|
||||||
.select(&result_desc)
|
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.inner_html()
|
|
||||||
.trim(),
|
|
||||||
&["duckduckgo"],
|
&["duckduckgo"],
|
||||||
)
|
))
|
||||||
})
|
})
|
||||||
.map(|search_result| (search_result.url.clone(), search_result))
|
|
||||||
.collect())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,4 +4,5 @@
|
|||||||
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
||||||
|
|
||||||
pub mod duckduckgo;
|
pub mod duckduckgo;
|
||||||
|
pub mod search_result_parser;
|
||||||
pub mod searx;
|
pub mod searx;
|
||||||
|
76
src/engines/search_result_parser.rs
Normal file
76
src/engines/search_result_parser.rs
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
//! This modules provides helper functionalities for parsing a html document into internal SearchResult.
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
|
||||||
|
use error_stack::{Report, Result};
|
||||||
|
use scraper::{html::Select, ElementRef, Html, Selector};
|
||||||
|
|
||||||
|
/// A html search result parser, based on a predefined CSS selectors.
|
||||||
|
pub struct SearchResultParser {
|
||||||
|
/// selector to locate the element which is displayed, if there were nothing found.
|
||||||
|
no_result: Selector,
|
||||||
|
/// selector to locate the element which contains one item from the search result.
|
||||||
|
results: Selector,
|
||||||
|
/// selector to locate the title relative to the search result item.
|
||||||
|
result_title: Selector,
|
||||||
|
/// selector to locate the url relative to the search result item.
|
||||||
|
result_url: Selector,
|
||||||
|
/// selector to locate the description relative to the search result item.
|
||||||
|
result_desc: Selector,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchResultParser {
|
||||||
|
/// Creates a new parser, if all the selectors are valid, otherwise it returns an EngineError
|
||||||
|
pub fn new(
|
||||||
|
no_result_selector: &str,
|
||||||
|
results_selector: &str,
|
||||||
|
result_title_selector: &str,
|
||||||
|
result_url_selector: &str,
|
||||||
|
result_desc_selector: &str,
|
||||||
|
) -> Result<SearchResultParser, EngineError> {
|
||||||
|
Ok(SearchResultParser {
|
||||||
|
no_result: new_selector(no_result_selector)?,
|
||||||
|
results: new_selector(results_selector)?,
|
||||||
|
result_title: new_selector(result_title_selector)?,
|
||||||
|
result_url: new_selector(result_url_selector)?,
|
||||||
|
result_desc: new_selector(result_desc_selector)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the html and returns element representing the 'no result found' response.
|
||||||
|
pub fn parse_for_no_results<'a>(&'a self, document: &'a Html) -> Select<'a, 'a> {
|
||||||
|
document.select(&self.no_result)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the html, and convert the results to SearchResult with the help of the builder function
|
||||||
|
pub fn parse_for_results(
|
||||||
|
&self,
|
||||||
|
document: &Html,
|
||||||
|
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
|
||||||
|
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||||
|
let res = document
|
||||||
|
.select(&self.results)
|
||||||
|
.filter_map(|result| {
|
||||||
|
let title = result.select(&self.result_title).next();
|
||||||
|
let url = result.select(&self.result_url).next();
|
||||||
|
let desc = result.select(&self.result_desc).next();
|
||||||
|
match (title, url, desc) {
|
||||||
|
(Some(ref t), Some(ref u), Some(ref d)) => builder(t, u, d),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(|search_result| (search_result.url.clone(), search_result))
|
||||||
|
.collect();
|
||||||
|
Ok(res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a Selector struct, if the given parameter is a valid css expression, otherwise convert it into an EngineError.
|
||||||
|
fn new_selector(selector: &str) -> Result<Selector, EngineError> {
|
||||||
|
Selector::parse(selector).map_err(|err| {
|
||||||
|
Report::new(EngineError::UnexpectedError).attach_printable(format!(
|
||||||
|
"invalid CSS selector: {}, err: {:?}",
|
||||||
|
selector, err
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
@ -3,16 +3,35 @@
|
|||||||
//! number if provided.
|
//! number if provided.
|
||||||
|
|
||||||
use reqwest::header::HeaderMap;
|
use reqwest::header::HeaderMap;
|
||||||
use scraper::{Html, Selector};
|
use scraper::Html;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use super::search_result_parser::SearchResultParser;
|
||||||
use crate::models::aggregation_models::SearchResult;
|
use crate::models::aggregation_models::SearchResult;
|
||||||
use crate::models::engine_models::{EngineError, SearchEngine};
|
use crate::models::engine_models::{EngineError, SearchEngine};
|
||||||
use error_stack::{Report, Result, ResultExt};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
|
|
||||||
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||||
pub struct Searx;
|
pub struct Searx {
|
||||||
|
/// The parser, used to interpret the search result.
|
||||||
|
parser: SearchResultParser,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Searx {
|
||||||
|
/// creates a Searx parser
|
||||||
|
pub fn new() -> Result<Searx, EngineError> {
|
||||||
|
Ok(Self {
|
||||||
|
parser: SearchResultParser::new(
|
||||||
|
"#urls>.dialog-error>p",
|
||||||
|
".result",
|
||||||
|
"h3>a",
|
||||||
|
"h3>a",
|
||||||
|
".content",
|
||||||
|
)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl SearchEngine for Searx {
|
impl SearchEngine for Searx {
|
||||||
@ -52,13 +71,7 @@ impl SearchEngine for Searx {
|
|||||||
&Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
&Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||||
);
|
);
|
||||||
|
|
||||||
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
if let Some(no_result_msg) = self.parser.parse_for_no_results(&document).nth(1) {
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| {
|
|
||||||
format!("invalid CSS selector: {}", "#urls>.dialog-error>p")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
if let Some(no_result_msg) = document.select(&no_result).nth(1) {
|
|
||||||
if no_result_msg.inner_html()
|
if no_result_msg.inner_html()
|
||||||
== "we didn't find any results. Please use another query or search in more categories"
|
== "we didn't find any results. Please use another query or search in more categories"
|
||||||
{
|
{
|
||||||
@ -66,48 +79,17 @@ impl SearchEngine for Searx {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let results: Selector = Selector::parse(".result")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
|
|
||||||
let result_title: Selector = Selector::parse("h3>a")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
|
|
||||||
let result_url: Selector = Selector::parse("h3>a")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
|
|
||||||
|
|
||||||
let result_desc: Selector = Selector::parse(".content")
|
|
||||||
.map_err(|_| Report::new(EngineError::UnexpectedError))
|
|
||||||
.attach_printable_lazy(|| format!("invalid CSS selector: {}", ".content"))?;
|
|
||||||
|
|
||||||
// scrape all the results from the html
|
// scrape all the results from the html
|
||||||
Ok(document
|
self.parser
|
||||||
.select(&results)
|
.parse_for_results(&document, |title, url, desc| {
|
||||||
.map(|result| {
|
url.value().attr("href").map(|url| {
|
||||||
SearchResult::new(
|
SearchResult::new(
|
||||||
result
|
title.inner_html().trim(),
|
||||||
.select(&result_title)
|
url,
|
||||||
.next()
|
desc.inner_html().trim(),
|
||||||
.unwrap()
|
&["searx"],
|
||||||
.inner_html()
|
)
|
||||||
.trim(),
|
})
|
||||||
result
|
|
||||||
.select(&result_url)
|
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.value()
|
|
||||||
.attr("href")
|
|
||||||
.unwrap(),
|
|
||||||
result
|
|
||||||
.select(&result_desc)
|
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.inner_html()
|
|
||||||
.trim(),
|
|
||||||
&["searx"],
|
|
||||||
)
|
|
||||||
})
|
})
|
||||||
.map(|search_result| (search_result.url.clone(), search_result))
|
|
||||||
.collect())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,12 +85,14 @@ impl EngineErrorInfo {
|
|||||||
pub fn new(error: &EngineError, engine: &str) -> Self {
|
pub fn new(error: &EngineError, engine: &str) -> Self {
|
||||||
Self {
|
Self {
|
||||||
error: match error {
|
error: match error {
|
||||||
|
EngineError::NoSuchEngineFound(_) => "EngineNotFound".to_owned(),
|
||||||
EngineError::RequestError => "RequestError".to_owned(),
|
EngineError::RequestError => "RequestError".to_owned(),
|
||||||
EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
|
EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
|
||||||
EngineError::UnexpectedError => "UnexpectedError".to_owned(),
|
EngineError::UnexpectedError => "UnexpectedError".to_owned(),
|
||||||
},
|
},
|
||||||
engine: engine.to_owned(),
|
engine: engine.to_owned(),
|
||||||
severity_color: match error {
|
severity_color: match error {
|
||||||
|
EngineError::NoSuchEngineFound(_) => "red".to_owned(),
|
||||||
EngineError::RequestError => "green".to_owned(),
|
EngineError::RequestError => "green".to_owned(),
|
||||||
EngineError::EmptyResultSet => "blue".to_owned(),
|
EngineError::EmptyResultSet => "blue".to_owned(),
|
||||||
EngineError::UnexpectedError => "red".to_owned(),
|
EngineError::UnexpectedError => "red".to_owned(),
|
||||||
|
@ -2,12 +2,14 @@
|
|||||||
//! the upstream search engines with the search query provided by the user.
|
//! the upstream search engines with the search query provided by the user.
|
||||||
|
|
||||||
use super::aggregation_models::SearchResult;
|
use super::aggregation_models::SearchResult;
|
||||||
use error_stack::{Result, ResultExt};
|
use error_stack::{Report, Result, ResultExt};
|
||||||
use std::{collections::HashMap, fmt, time::Duration};
|
use std::{collections::HashMap, fmt, time::Duration};
|
||||||
|
|
||||||
/// A custom error type used for handle engine associated errors.
|
/// A custom error type used for handle engine associated errors.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum EngineError {
|
pub enum EngineError {
|
||||||
|
/// No matching engine found
|
||||||
|
NoSuchEngineFound(String),
|
||||||
/// This variant handles all request related errors like forbidden, not found,
|
/// This variant handles all request related errors like forbidden, not found,
|
||||||
/// etc.
|
/// etc.
|
||||||
EmptyResultSet,
|
EmptyResultSet,
|
||||||
@ -24,6 +26,9 @@ pub enum EngineError {
|
|||||||
impl fmt::Display for EngineError {
|
impl fmt::Display for EngineError {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
|
EngineError::NoSuchEngineFound(engine) => {
|
||||||
|
write!(f, "No such engine with the name '{engine}' found")
|
||||||
|
}
|
||||||
EngineError::EmptyResultSet => {
|
EngineError::EmptyResultSet => {
|
||||||
write!(f, "The upstream search engine returned an empty result set")
|
write!(f, "The upstream search engine returned an empty result set")
|
||||||
}
|
}
|
||||||
@ -134,18 +139,25 @@ impl EngineHandler {
|
|||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// It returns an option either containing the value or a none if the engine is unknown
|
/// It returns an option either containing the value or a none if the engine is unknown
|
||||||
pub fn new(engine_name: &str) -> Option<Self> {
|
pub fn new(engine_name: &str) -> Result<Self, EngineError> {
|
||||||
let engine: (&'static str, Box<dyn SearchEngine>) =
|
let engine: (&'static str, Box<dyn SearchEngine>) =
|
||||||
match engine_name.to_lowercase().as_str() {
|
match engine_name.to_lowercase().as_str() {
|
||||||
"duckduckgo" => (
|
"duckduckgo" => {
|
||||||
"duckduckgo",
|
let engine = crate::engines::duckduckgo::DuckDuckGo::new()?;
|
||||||
Box::new(crate::engines::duckduckgo::DuckDuckGo),
|
("duckduckgo", Box::new(engine))
|
||||||
),
|
}
|
||||||
"searx" => ("searx", Box::new(crate::engines::searx::Searx)),
|
"searx" => {
|
||||||
_ => return None,
|
let engine = crate::engines::searx::Searx::new()?;
|
||||||
|
("searx", Box::new(engine))
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(Report::from(EngineError::NoSuchEngineFound(
|
||||||
|
engine_name.to_string(),
|
||||||
|
)))
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Some(Self {
|
Ok(Self {
|
||||||
engine: engine.1,
|
engine: engine.1,
|
||||||
name: engine.0,
|
name: engine.0,
|
||||||
})
|
})
|
||||||
|
@ -191,7 +191,7 @@ async fn results(
|
|||||||
let engines: Vec<EngineHandler> = cookie_value
|
let engines: Vec<EngineHandler> = cookie_value
|
||||||
.engines
|
.engines
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|name| EngineHandler::new(name))
|
.filter_map(|name| EngineHandler::new(name).ok())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
safe_search_level = match config.safe_search {
|
safe_search_level = match config.safe_search {
|
||||||
|
Loading…
Reference in New Issue
Block a user