0
Fork 0
mirror of https://git.lolcat.ca/lolcat/4get.git synced 2025-01-15 00:10:33 -05:00

duckduckgo scraper rewrite

This commit is contained in:
lolcat 2024-12-17 00:31:15 -05:00
parent 0b3bbe0f15
commit 774f7113df
6 changed files with 1488 additions and 2234 deletions

21
api.txt
View file

@ -1,10 +1,17 @@
__ __ __ 44
/ // / ____ ____ / /_ 4444444 44
/ // /_/ __ `/ _ \/ __/ 44444444 44444 444
/__ __/ /_/ / __/ /_ 44444444 444444 444444444
/_/ \__, /\___/\__/ 44444 44444444 444444444
/____/ 444444444 4444444
4444444444 444444
4444444444444
444444444444444444
444444444444444
44444444
4444
44
+ Welcome to the 4get API documentation + + Welcome to the 4get API documentation +
+ Terms of use + Terms of use

View file

@ -119,7 +119,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things. // Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0"; const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0";
// Proxy pool assignments for each scraper // Proxy pool assignments for each scraper
// false = Use server's raw IP // false = Use server's raw IP

View file

@ -75,6 +75,7 @@ class backend{
break; break;
case "socks5_hostname": case "socks5_hostname":
case "socks5h":
case "socks5a": case "socks5a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME); curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);

View file

@ -526,4 +526,85 @@ class fuckhtml{
$string $string
); );
} }
public function extract_json($json){
$len = strlen($json);
$array_level = 0;
$object_level = 0;
$in_quote = null;
$start = null;
for($i=0; $i<$len; $i++){
switch($json[$i]){
case "[":
if($in_quote === null){
$array_level++;
if($start === null){
$start = $i;
}
}
break;
case "]":
if($in_quote === null){
$array_level--;
}
break;
case "{":
if($in_quote === null){
$object_level++;
if($start === null){
$start = $i;
}
}
break;
case "}":
if($in_quote === null){
$object_level--;
}
break;
case "\"":
case "'":
if(
$i !== 0 &&
$json[$i - 1] !== "\\"
){
// found a non-escaped quote
if($in_quote === null){
// open quote
$in_quote = $json[$i];
}elseif($in_quote === $json[$i]){
// close quote
$in_quote = null;
}
}
break;
}
if(
$start !== null &&
$array_level === 0 &&
$object_level === 0
){
return substr($json, $start, $i - $start + 1);
break;
}
}
}
} }

File diff suppressed because it is too large Load diff

1
scraper/out.txt Normal file

File diff suppressed because one or more lines are too long