0
0
mirror of https://github.com/neon-mmd/websurfx.git synced 2024-11-24 06:58:22 -05:00

Merge pull request #14 from neon-mmd/better-evasion-for-ip-blocking

Improve code to better evade bot detection and ip blocking
This commit is contained in:
neon_arch 2023-05-08 08:58:39 +00:00 committed by GitHub
commit 30b187a96e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 3 deletions

View File

@ -4,7 +4,7 @@
use std::collections::HashMap;
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use scraper::{Html, Selector};
use crate::search_results_handler::aggregation_models::RawSearchResult;
@ -50,7 +50,8 @@ pub async fn results(
let mut header_map = HeaderMap::new();
header_map.insert(USER_AGENT, user_agent.parse()?);
header_map.insert(REFERER, "https://google.com/".parse()?);
header_map.insert(CONTENT_TYPE, "text/html; charset=UTF-8".parse()?);
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
header_map.insert(COOKIE, "kl=wt-wt".parse()?);
// fetch the html from upstream duckduckgo engine
// TODO: Write better error handling code to handle no results case.

View File

@ -2,7 +2,7 @@
//! by querying the upstream searx search engine instance with user provided query and with a page
//! number if provided.
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
use scraper::{Html, Selector};
use std::collections::HashMap;
@ -38,6 +38,7 @@ pub async fn results(
header_map.insert(USER_AGENT, user_agent.parse()?);
header_map.insert(REFERER, "https://google.com/".parse()?);
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
// fetch the html from upstream searx instance engine
// TODO: Write better error handling code to handle no results case.