1
Fork 1
mirror of https://git.lolcat.ca/lolcat/4get.git synced 2024-11-08 17:43:07 -05:00

Compare commits

..

No commits in common. "53d40c6e4e86fdd8ed86d5fb043b6c93d9c7b3ea" and "b98a39342103816fe260a9c7c9fd4adf058895c8" have entirely different histories.

3 changed files with 108 additions and 199 deletions

View file

@ -703,43 +703,6 @@ class google{
}
// reset
$this->fuckhtml->load($result_div);
}else{
// get the "Did you mean?" prompt
$taw =
$this->fuckhtml
->getElementById(
"taw"
);
if($taw){
$this->fuckhtml->load($taw);
$as =
$this->fuckhtml
->getElementsByTagName(
"a"
);
if(count($as) !== 0){
$text =
$this->fuckhtml
->getTextContent(
$as[0]
);
// @TODO implement did_you_mean
$out["spelling"] = [
"type" => "including",
"using" => $search,
"correction" => $text
];
}
}
$this->fuckhtml->load($result_div);
}
@ -932,10 +895,36 @@ class google{
// get "Related Searches" and "People also search for"
//
$relateds =
array_merge(
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"align-items" => "center",
"background-color" => "#28292a",
"border-radius" => "100px",
"box-sizing" => "border-box",
"display" => "flex",
"max-height" => "none",
"min-height" => "48px",
"padding-left" => "17px",
"padding-right" => "17px",
"position" => "relative"
]
) . " " .
$this->getstyle(
[
"margin-left" => "8px",
"margin-right" => "8px"
]
),
"a"
),
$this->fuckhtml
->getElementsByClassName(
"wyccme",
"div"
)
);
foreach($relateds as $related){
@ -1365,7 +1354,7 @@ class google{
"font-size" => "12px",
"line-height" => "1.34",
"display" => "inline-block",
"font-family" => "google sans,arial,sans-serif",
"font-family" => "Google Sans,arial,sans-serif",
"padding-right" => "0",
"white-space" => "nowrap"
]
@ -1412,7 +1401,7 @@ class google{
"line-height" => "22px",
"overflow" => "hidden",
"word-break" => "break-word",
"color" => "#4d5156"
"color" => "#bdc1c6"
]
),
"div"
@ -1426,9 +1415,12 @@ class google{
->getElementsByClassName(
$this->getstyle(
[
"background-color" => "rgba(0,0,0,0.6)",
"color" => "#fff",
"fill" => "#fff"
"border-radius" => "10px",
"font-family" => "arial,sans-serif-medium,sans-serif",
"font-size" => "12px",
"line-height" => "16px",
"padding-block" => "2px",
"padding-inline" => "8px"
]
),
"div"
@ -1441,6 +1433,14 @@ class google{
->getTextContent(
$duration[0]
);
// remove duration from description
$description[0]["innerHTML"] =
str_replace(
$duration[0]["outerHTML"],
"",
$description[0]["innerHTML"]
);
}
$web["description"] =
@ -1979,7 +1979,7 @@ class google{
"font-size" => "12px",
"line-height" => "1.34",
"display" => "inline-block",
"font-family" => "google sans,arial,sans-serif",
"font-family" => "Google Sans,arial,sans-serif",
"padding-right" => "0",
"white-space" => "nowrap"
]
@ -2211,7 +2211,7 @@ class google{
->getElementsByClassName(
$this->getstyle(
[
"font-family" => "google sans,arial,sans-serif",
"font-family" => "Google Sans,arial,sans-serif",
"font-size" => "28px",
"line-height" => "36px"
]
@ -2801,22 +2801,7 @@ class google{
}
}
// get heading element
$heading =
$this->fuckhtml
->getElementsByAttributeValue(
"role",
"heading",
"div"
);
if(count($heading) === 0){
// no heading, fuck this.
continue;
}
// get thumbnail before loading heading object
// get thumbnail
$image =
$this->fuckhtml
->getElementsByAttributeName(
@ -2838,6 +2823,35 @@ class google{
];
}
// get title
$title =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"font-family" => "arial,sans-serif",
"font-size" => "16px",
"font-weight" => "400",
"line-height" => "24px"
]
),
"div"
);
if(count($title) === 0){
// ?? no title
continue;
}
$title =
$this->titledots(
$this->fuckhtml
->getTextContent(
$title[0]
)
);
// get duration
$duration_div =
$this->fuckhtml
@ -2894,38 +2908,6 @@ class google{
}
}
// load heading
$this->fuckhtml->load($heading[0]);
// get title
$title =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"font-family" => "arial,sans-serif",
"font-size" => "16px",
"font-weight" => "400",
"line-height" => "24px"
]
),
"div"
);
if(count($title) === 0){
// ?? no title
continue;
}
$title =
$this->titledots(
$this->fuckhtml
->getTextContent(
$title[0]
)
);
// get date
$date_div =
$this->fuckhtml
@ -3958,7 +3940,7 @@ class google{
for($k=0; $k<count($values_regex[1]); $k++){
$values[trim($values_regex[1][$k])] =
strtolower(trim($values_regex[2][$k]));
trim($values_regex[2][$k]);
}
$names = explode(",", $matches[1][$i]);
@ -3989,7 +3971,7 @@ class google{
foreach($this->styles[":root"] as $key => $value){
$this->css_colors[$value] = strtolower($key);
$this->css_colors[$value] = $key;
}
}
}
@ -4224,7 +4206,7 @@ class google{
throw new Exception("Failed to get HTML");
}
//$html = file_get_contents("scraper/google.html");
//$html = file_get_contents("scraper/google-video.html");
$response = $this->parsepage($html, "videos", $search, $proxy, $params);
$out = [

View file

@ -6,9 +6,6 @@ class sc{
include "lib/backend.php";
$this->backend = new backend("sc");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
@ -28,7 +25,7 @@ class sc{
];
}
private function get($proxy, $url, $get = [], $web_req = false){
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
@ -40,15 +37,9 @@ class sc{
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
// use http2
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
if($web_req === false){
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept: application/json, text/javascript, */*; q=0.01",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://soundcloud.com/",
@ -57,26 +48,8 @@ class sc{
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"Priority: u=1"]
"Sec-Fetch-Site: same-site"]
);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: cross-site",
"Priority: u=1",
"TE: trailers"]
);
}
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
@ -327,12 +300,9 @@ class sc{
$description[] = $song["title"];
}
if(count($description) !== 0){
if(count($description) != 0){
$description = trim($count . " songs. " . implode(", ", $description));
}else{
$description = "";
}
if(
@ -426,48 +396,13 @@ class sc{
$token = apcu_fetch("sc_token");
if($token !== false){
return $token;
}
// search through all javascript components on the main page
try{
$html =
$this->get(
$proxy,
"https://soundcloud.com",
[],
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch front page");
}
$this->fuckhtml->load($html);
$scripts =
$this->fuckhtml
->getElementsByTagName(
"script"
);
foreach($scripts as $script){
if(
!isset($script["attributes"]["src"]) ||
strpos($script["attributes"]["src"], "sndcdn.com") === false
){
continue;
}
if($token === false){
try{
$js =
$this->get(
$proxy,
$script["attributes"]["src"],
"https://a-v2.sndcdn.com/assets/0-a901c1e0.js",
[]
);
}catch(Exception $error){
@ -481,15 +416,16 @@ class sc{
$token
);
if(isset($token[1])){
if(!isset($token[1])){
throw new Exception("Failed to get search token");
}
apcu_store("sc_token", $token[1]);
return $token[1];
break;
}
}
throw new Exception("Did not find a Soundcloud token in the Javascript blobs");
return $token;
}
private function limitstrlen($text){

View file

@ -252,30 +252,21 @@ class yep{
curl_setopt($curlproc, CURLOPT_URL, $url);
// use http2
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
// set ciphers
curl_setopt(
$curlproc,
CURLOPT_SSL_CIPHER_LIST,
"aes_128_gcm_sha_256,chacha20_poly1305_sha_256,aes_256_gcm_sha_384,ecdhe_ecdsa_aes_128_gcm_sha_256,ecdhe_rsa_aes_128_gcm_sha_256,ecdhe_ecdsa_chacha20_poly1305_sha_256,ecdhe_rsa_chacha20_poly1305_sha_256,ecdhe_ecdsa_aes_256_gcm_sha_384,ecdhe_rsa_aes_256_gcm_sha_384,ecdhe_ecdsa_aes_256_sha,ecdhe_ecdsa_aes_128_sha,ecdhe_rsa_aes_128_sha,ecdhe_rsa_aes_256_sha,rsa_aes_128_gcm_sha_256,rsa_aes_256_gcm_sha_384,rsa_aes_128_sha,rsa_aes_256_sha"
);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br, zstd",
"Referer: https://yep.com/",
"Origin: https://yep.com",
"Connection: keep-alive",
"DNT: 1",
"Priority: u=1",
"Origin: https://yep.com",
"Referer: https://yep.com/",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"Priority: u=4",
"TE: trailers"]
);