mirror of
https://git.lolcat.ca/lolcat/4get.git
synced 2024-11-08 17:43:07 -05:00
yep scraper cloudflare error handling
This commit is contained in:
parent
bcb5c4d519
commit
92d0102738
1 changed files with 38 additions and 16 deletions
|
@ -6,6 +6,9 @@ class yep{
|
||||||
|
|
||||||
include "lib/backend.php";
|
include "lib/backend.php";
|
||||||
$this->backend = new backend("yep");
|
$this->backend = new backend("yep");
|
||||||
|
|
||||||
|
include "lib/fuckhtml.php";
|
||||||
|
$this->fuckhtml = new fuckhtml();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getfilters($page){
|
public function getfilters($page){
|
||||||
|
@ -254,8 +257,10 @@ class yep{
|
||||||
["User-Agent: " . config::USER_AGENT,
|
["User-Agent: " . config::USER_AGENT,
|
||||||
"Accept: */*",
|
"Accept: */*",
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
"Accept-Encoding: gzip",
|
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||||
|
"Connection: keep-alive",
|
||||||
"DNT: 1",
|
"DNT: 1",
|
||||||
|
"Priority: u=1",
|
||||||
"Origin: https://yep.com",
|
"Origin: https://yep.com",
|
||||||
"Referer: https://yep.com/",
|
"Referer: https://yep.com/",
|
||||||
"Connection: keep-alive",
|
"Connection: keep-alive",
|
||||||
|
@ -265,6 +270,9 @@ class yep{
|
||||||
"TE: trailers"]
|
"TE: trailers"]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// http3 bypass
|
||||||
|
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, 30);
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||||
|
@ -324,27 +332,41 @@ class yep{
|
||||||
|
|
||||||
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
||||||
$json =
|
$json =
|
||||||
json_decode(
|
$this->get(
|
||||||
$this->get(
|
$this->backend->get_ip(),
|
||||||
$this->backend->get_ip(),
|
"https://api.yep.com/fs/2/search",
|
||||||
"https://api.yep.com/fs/2/search",
|
[
|
||||||
[
|
"client" => "web",
|
||||||
"client" => "web",
|
"gl" => $country == "all" ? $country : strtoupper($country),
|
||||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
"limit" => "99999",
|
||||||
"limit" => "99999",
|
"no_correct" => "false",
|
||||||
"no_correct" => "false",
|
"q" => $search,
|
||||||
"q" => $search,
|
"safeSearch" => $nsfw,
|
||||||
"safeSearch" => $nsfw,
|
"type" => "web"
|
||||||
"type" => "web"
|
]
|
||||||
]
|
|
||||||
),
|
|
||||||
true
|
|
||||||
);
|
);
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
|
|
||||||
throw new Exception("Failed to fetch JSON");
|
throw new Exception("Failed to fetch JSON");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// detect cloudflare page
|
||||||
|
$this->fuckhtml->load($json);
|
||||||
|
|
||||||
|
if(
|
||||||
|
count(
|
||||||
|
$this->fuckhtml
|
||||||
|
->getElementsByClassName(
|
||||||
|
"cf-wrapper",
|
||||||
|
"div"
|
||||||
|
)
|
||||||
|
) !== 0
|
||||||
|
){
|
||||||
|
|
||||||
|
throw new Exception("Blocked by Cloudflare");
|
||||||
|
}
|
||||||
|
|
||||||
|
$json = json_decode($json, true);
|
||||||
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
||||||
|
|
||||||
if($json === null){
|
if($json === null){
|
||||||
|
|
Loading…
Reference in a new issue