mirror of
https://github.com/neon-mmd/websurfx.git
synced 2024-11-21 21:48:21 -05:00
improve code to evade bot detection and closes #8
This commit is contained in:
parent
ab7348ba3d
commit
0502a8f551
@ -4,7 +4,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
@ -50,7 +50,8 @@ pub async fn results(
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(USER_AGENT, user_agent.parse()?);
|
||||
header_map.insert(REFERER, "https://google.com/".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "text/html; charset=UTF-8".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
||||
header_map.insert(COOKIE, "kl=wt-wt".parse()?);
|
||||
|
||||
// fetch the html from upstream duckduckgo engine
|
||||
// TODO: Write better error handling code to handle no results case.
|
||||
|
@ -2,7 +2,7 @@
|
||||
//! by querying the upstream searx search engine instance with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use scraper::{Html, Selector};
|
||||
use std::collections::HashMap;
|
||||
|
||||
@ -38,6 +38,7 @@ pub async fn results(
|
||||
header_map.insert(USER_AGENT, user_agent.parse()?);
|
||||
header_map.insert(REFERER, "https://google.com/".parse()?);
|
||||
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
||||
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
|
||||
|
||||
// fetch the html from upstream searx instance engine
|
||||
// TODO: Write better error handling code to handle no results case.
|
||||
|
Loading…
Reference in New Issue
Block a user