0
Fork 0
mirror of https://git.lolcat.ca/lolcat/4get.git synced 2025-01-01 00:03:55 -05:00

added yandex web and video search, removed fb search

This commit is contained in:
lolcat 2023-08-27 01:45:59 -04:00
parent 12a6278a5f
commit 1fd4c2de6d
7 changed files with 1178 additions and 78 deletions

BIN
banner/cynic.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

View file

@ -878,6 +878,7 @@ class frontend{
"option" => [ "option" => [
"ddg" => "DuckDuckGo", "ddg" => "DuckDuckGo",
"brave" => "Brave", "brave" => "Brave",
"yandex" => "Yandex",
//"google" => "Google", //"google" => "Google",
"mojeek" => "Mojeek", "mojeek" => "Mojeek",
"marginalia" => "Marginalia", "marginalia" => "Marginalia",
@ -903,9 +904,10 @@ class frontend{
"display" => "Scraper", "display" => "Scraper",
"option" => [ "option" => [
"yt" => "YouTube", "yt" => "YouTube",
"fb" => "Facebook videos", //"fb" => "Facebook videos",
"ddg" => "DuckDuckGo", "ddg" => "DuckDuckGo",
"brave" => "Brave"//, "brave" => "Brave",
"yandex" => "Yandex"
//"google" => "Google" //"google" => "Google"
] ]
]; ];
@ -972,11 +974,11 @@ class frontend{
include "scraper/google.php"; include "scraper/google.php";
$lib = new google(); $lib = new google();
break; break;
/*
case "fb": case "fb":
include "scraper/facebook.php"; include "scraper/facebook.php";
$lib = new facebook(); $lib = new facebook();
break; break;*/
case "mojeek": case "mojeek":
include "scraper/mojeek.php"; include "scraper/mojeek.php";

View file

@ -1182,6 +1182,28 @@ class brave{
$this->fuckhtml->load($infobox); $this->fuckhtml->load($infobox);
$div = $this->fuckhtml->getElementsByTagName("div"); $div = $this->fuckhtml->getElementsByTagName("div");
/*
Get small description
*/
$small_desc =
$this->fuckhtml
->getElementsByClassName(
"infobox-description",
$div
);
if(count($small_desc) !== 0){
$answer["description"][] = [
"type" => "quote",
"value" =>
$this->fuckhtml
->getTextContent(
$small_desc[0]
)
];
}
/* /*
Get title + url Get title + url
*/ */
@ -1292,28 +1314,25 @@ class brave{
if(count($code) === 0){ if(count($code) === 0){
$answer["description"] = $answer["description"][] = [
[ "type" => "text",
[ "value" =>
"type" => "text", $this->fuckhtml
"value" => ->getTextContent(
$this->fuckhtml $desc_tmp
->getTextContent( )
$desc_tmp ];
)
], $answer["description"][] = [
[ "type" => "quote",
"type" => "quote", "value" =>
"value" => $this->fuckhtml
$this->fuckhtml ->getTextContent(
->getTextContent( $author
$author )
)
]
]; ];
}else{ }else{
$text = [];
$i = 0; $i = 0;
foreach($code as $snippet){ foreach($code as $snippet){
@ -1344,7 +1363,7 @@ class brave{
); );
$value = $this->fuckhtml->getTextContent($tmphtml[0], false, false); $value = $this->fuckhtml->getTextContent($tmphtml[0], false, false);
$this->appendtext($value, $text, $i); $this->appendtext($value, $answer["description"], $i);
$type = null; $type = null;
switch($tag["tagName"]){ switch($tag["tagName"]){
@ -1365,10 +1384,10 @@ class brave{
$type == "title" $type == "title"
){ ){
$text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]); $answer["description"][$i - 1]["value"] = rtrim($answer["description"][$i - 1]["value"]);
} }
$text[] = [ $answer["description"][] = [
"type" => $type, "type" => $type,
"value" => $value "value" => $value
]; ];
@ -1393,21 +1412,21 @@ class brave{
if(strlen($tmphtml) !== 0){ if(strlen($tmphtml) !== 0){
$value = $this->fuckhtml->getTextContent($tmphtml, false, false); $value = $this->fuckhtml->getTextContent($tmphtml, false, false);
$this->appendtext($value, $text, $i); $this->appendtext($value, $answer["description"], $i);
} }
break; break;
case "pre": case "pre":
switch($text[$i - 1]["type"]){ switch($answer["description"][$i - 1]["type"]){
case "text": case "text":
case "italic": case "italic":
$text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]); $answer["description"][$i - 1]["value"] = rtrim($answer["description"][$i - 1]["value"]);
break; break;
} }
$text[] = $answer["description"][] =
[ [
"type" => "code", "type" => "code",
"value" => "value" =>
@ -1441,7 +1460,7 @@ class brave{
->getTextContent( ->getTextContent(
$elem $elem
), ),
$text, $answer["description"],
$i $i
); );
} }
@ -1451,21 +1470,19 @@ class brave{
if( if(
$i !== 0 && $i !== 0 &&
$text[$i - 1]["type"] == "text" $answer["description"][$i - 1]["type"] == "text"
){ ){
$text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]); $answer["description"][$i - 1]["value"] = rtrim($answer["description"][$i - 1]["value"]);
} }
if($author){ if($author){
$text[] = [ $answer["description"][] = [
"type" => "quote", "type" => "quote",
"value" => $this->fuckhtml->getTextContent($author) "value" => $this->fuckhtml->getTextContent($author)
]; ];
} }
$answer["description"] = $text;
} }
}else{ }else{
@ -1481,22 +1498,20 @@ class brave{
if(count($description) !== 0){ if(count($description) !== 0){
$description = $answer["description"][] =
[ [
[ "type" => "text",
"type" => "text", "value" =>
"value" => $this->titledots(
$this->titledots( preg_replace(
preg_replace( '/ Wikipedia$/',
'/ Wikipedia$/', "",
"", $this->fuckhtml
$this->fuckhtml ->getTextContent(
->getTextContent( $description[0]
$description[0]
)
) )
) )
] )
]; ];
$ratings = $ratings =
@ -1514,7 +1529,7 @@ class brave{
"div" "div"
); );
$description[] = [ $answer["description"][] = [
"type" => "title", "type" => "title",
"value" => "Ratings" "value" => "Ratings"
]; ];
@ -1550,36 +1565,34 @@ class brave{
)[0] )[0]
); );
$c = count($description) - 1; $c = count($answer["description"]) - 1;
if( if(
$c !== -1 && $c !== -1 &&
$description[$c]["type"] == "text" $answer["description"][$c]["type"] == "text"
){ ){
$description[$c]["value"] .= $num . " "; $answer["description"][$c]["value"] .= $num . " ";
}else{ }else{
$description[] = [ $answer["description"][] = [
"type" => "text", "type" => "text",
"value" => $num . " " "value" => $num . " "
]; ];
} }
$description[] = [ $answer["description"][] = [
"type" => "link", "type" => "link",
"value" => $this->fuckhtml->getTextContent($href), "value" => $this->fuckhtml->getTextContent($href),
"url" => $this->fuckhtml->getTextContent($href["attributes"]["href"]) "url" => $this->fuckhtml->getTextContent($href["attributes"]["href"])
]; ];
$description[] = [ $answer["description"][] = [
"type" => "text", "type" => "text",
"value" => " (" . $votes . ")\n" "value" => " (" . $votes . ")\n"
]; ];
} }
} }
$answer["description"] = $description;
} }
} }

View file

@ -228,16 +228,16 @@ class facebook{
) )
); );
} }
/*
$html = $html =
$this->get( $this->get(
"https://www.facebook.com/watch/search/", "https://www.facebook.com/watch/search/",
$req $req
); );*/
/*
$handle = fopen("scraper/facebook.html", "r"); $handle = fopen("scraper/facebook.html", "r");
$html = fread($handle, filesize("scraper/facebook.html")); $html = fread($handle, filesize("scraper/facebook.html"));
fclose($handle);*/ fclose($handle);
preg_match_all( preg_match_all(
'/({"__bbox":.*,"sequence_number":0}})\]\]/', '/({"__bbox":.*,"sequence_number":0}})\]\]/',

View file

@ -824,8 +824,6 @@ class google{
$html = fread($handle, filesize("scraper/google.html")); $html = fread($handle, filesize("scraper/google.html"));
fclose($handle); fclose($handle);
$this->fuckhtml->load($html);
$out = [ $out = [
"status" => "ok", "status" => "ok",
"spelling" => [ "spelling" => [
@ -841,6 +839,507 @@ class google{
"news" => [], "news" => [],
"related" => [] "related" => []
]; ];
$this->parsejavascript($html);
$containers =
$this->fuckhtml
->getElementsByClassName(
$this->findstyles(
[
"background-color" => "#fff",
"margin-bottom" => "10px",
"-webkit-box-shadow" => "0 1px 6px rgba(32,33,36,0.28)",
"border-radius" => "8px"
],
self::is_class
),
"div"
);
foreach($containers as $container){
$this->fuckhtml->load($container);
$title =
$this->fuckhtml
->getElementsByClassName(
$this->findstyles(
[
"color" => "#1967d2",
"font-size" => "20px",
"line-height" => "26px"
],
self::is_class
),
"div"
);
if(count($title) !== 0){
/*
Container is a web link
*/
$web = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$title[0]
)
),
"description" => null,
"url" =>
$this->decodeurl(
$this->fuckhtml
->getElementsByTagName("a")
[0]
["attributes"]
["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
$container = $container["innerHTML"];
$description_container =
$this->fuckhtml
->getElementsByClassName(
$this->findstyles(
[
"padding" => "12px 16px 12px"
],
self::is_class
),
"div"
)[1];
$description =
$description_container["innerHTML"];
// get sublinks
$this->fuckhtml->load($description);
$links =
$this->fuckhtml
->getElementsByTagName("a");
$skip = true;
foreach($links as $link){
$description =
str_replace(
$link["outerHTML"],
"",
$description
);
if($skip){
$skip = false;
continue;
}
$sublink = [
"title" => null,
"description" => null,
"url" => null,
"date" => null
];
$sublink["title"] =
$this->fuckhtml
->getTextContent(
$link
);
$sublink["url"] =
$this->decodeurl(
$link
["attributes"]
["href"]
);
$web["sublink"][] = $sublink;
}
// get thumbnail before we call loadhtml again
$img =
$this->fuckhtml
->getElementsByTagName("img");
if(count($img) !== 0){
if(
isset($img[0]["attributes"]["alt"]) &&
stripos($img[0]["attributes"]["alt"], "Video for") !== false
){
// is a video thumbnail
$web["thumb"]["ratio"] = "16:9";
}else{
// is a google thumbnail
$web["thumb"]["ratio"] = "1:1";
}
$web["thumb"]["url"] =
$this->getimage(
$img[0]["attributes"]["id"]
);
}
// get table elements
$this->fuckhtml->load($description);
$levels =
$this->fuckhtml
->getElementsByClassName(
$this->findstyles(
[
"padding-bottom" => "8px"
],
self::is_class
),
"div"
);
$additional_info = [];
foreach($levels as $level){
$this->fuckhtml->load($level);
$spans =
$this->fuckhtml
->getElementsByTagName(
"span"
);
$is_rating = -2;
foreach($spans as $span){
// clean up description
$description =
str_replace(
$span["outerHTML"],
"",
$description
);
$innertext =
$this->fuckhtml
->getTextContent(
$span
);
if($innertext == ""){ continue; }
if(
strtolower($innertext)
== "rating"
){
$is_rating = -1;
continue;
}
/*
Parse rating object
*/
if($is_rating >= -1){
if($span["level"] !== 1){ continue; }
$is_rating++;
// 10/10 (123)
if($is_rating === 0){
$innertext = explode(" ", $innertext, 2);
$web["table"]["Rating"] = $innertext[0];
$web["table"]["Hits"] =
trim(
str_replace(
[
"(",
")"
],
"",
$innertext[1]
)
);
continue;
}
// US$4.99
// MYR 50.00
// $38.34
// JP¥6,480
if($is_rating === 2){
$web["table"]["Price"] = $innertext;
continue;
}
// Android / In stock
if($is_rating === 4){
$web["table"]["Support"] = $innertext;
continue;
}
// ignore the rest
continue;
}
/*
Parse standalone text
*/
$additional_info[] = $innertext;
}
}
for($i=0; $i<count($additional_info); $i++){
// @TODO
// generate better node names
$web["table"]["Info node #$i"] = $additional_info[$i];
}
$this->fuckhtml->load($description);
// get date node
$span =
$this->fuckhtml
->getElementsByTagName(
"span"
);
if(count($span) !== 0){
$description =
str_replace(
$span[0]["outerHTML"],
"",
$description
);
$span =
strtotime(
$this->fuckhtml
->getTextContent(
$span[0]
)
);
if($span){
$web["date"] = $span;
}
}
$web["description"] =
trim(
$this->fuckhtml
->getTextContent(
$description
),
" ·."
);
$out["web"][] = $web;
continue;
}
// check for container title header
$container_title =
$this->fuckhtml
->getElementsByClassName(
$this->findstyles(
[
"font-weight" => "bold",
"font-size" => "16px",
"color" => "#000",
"margin" => "0",
"padding" => "12px 16px 0 16px"
],
self::is_class
),
"div"
);
if(count($container_title) !== 0){
$container_title =
strtolower(
$this->fuckhtml
->getTextContent(
$container_title[0]
)
);
if(
$container_title == "related searches" ||
$container_title == "people also search for"
){
/*
Parse related searches
*/
$as =
$this->fuckhtml
->getElementsByTagName("a");
foreach($as as $a){
$out["related"][] =
$this->fuckhtml
->getTextContent($a);
}
}
continue;
}
/*
Parse image carousel
*/
$title_container =
$this->fuckhtml
->getElementsByClassName(
$this->findstyles(
[
"padding" => "12px 16px 12px"
],
self::is_class
),
"div"
);
if(count($title_container) !== 0){
$title_container =
strtolower(
$this->fuckhtml
->getTextContent(
$title_container[0]
)
);
if($title_container == "imagesview all"){
/*
Image carousel
*/
$pcitem =
$this->fuckhtml
->getElementsByClassName(
"pcitem",
"div"
);
foreach($pcitem as $item){
$this->fuckhtml->load($item);
$link =
$this->fuckhtml
->getElementsByTagName(
"a"
)[0];
parse_str(
parse_url(
$this->fuckhtml
->getTextContent(
$link
["attributes"]
["href"]
),
PHP_URL_QUERY
),
$link
);
if(isset($link["tbm"])){
continue;
}
$image =
$this->fuckhtml
->getElementsByTagName("img")[0];
$title =
$this->fuckhtml
->getTextContent(
$image
["attributes"]
["alt"]
);
$image =
$this->getimage(
$image
["attributes"]
["id"]
);
$out["image"][] = [
"title" => $title,
"source" => [
[
"url" => $link["imgurl"],
"width" => (int)$link["w"],
"height" => (int)$link["h"]
],
[
"url" => $image,
"width" => (int)$link["tbnw"],
"height" => (int)$link["tbnh"]
]
],
"url" => $link["imgrefurl"]
];
}
}
}
/*
Get next page
*/
$as =
$this->fuckhtml
->getElementsByTagName("a");
foreach($as as $a){
if(
isset($a["attributes"]["aria-label"]) &&
strtolower($a["attributes"]["aria-label"]) == "next page"
){
$out["npt"] =
$this->fuckhtml
->getTextContent(
$a["attributes"]["href"]
);
}
}
}
return $out;
} }
@ -1163,7 +1662,7 @@ class google{
return $time; return $time;
} }
private function loadjavascriptcrap($html){ private function parsejavascript($html){
$this->fuckhtml->load($html); $this->fuckhtml->load($html);

View file

@ -18,8 +18,6 @@ class yandex{
$curlproc = curl_init(); $curlproc = curl_init();
$search = $get["text"];
if($get !== []){ if($get !== []){
$get = http_build_query($get); $get = http_build_query($get);
$url .= "?" . $get; $url .= "?" . $get;
@ -40,7 +38,7 @@ class yandex{
"Accept-Language: en-US,en;q=0.5", "Accept-Language: en-US,en;q=0.5",
"DNT: 1", "DNT: 1",
"Cookie: yp=1716337604.sp.family%3A{$nsfw}#1685406411.szm.1:1920x1080:1920x999", "Cookie: yp=1716337604.sp.family%3A{$nsfw}#1685406411.szm.1:1920x1080:1920x999",
"Referer: https://yandex.com/images/search?text={$search}", "Referer: https://yandex.com/images/search",
"Connection: keep-alive", "Connection: keep-alive",
"Upgrade-Insecure-Requests: 1", "Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document", "Sec-Fetch-Dest: document",
@ -72,6 +70,35 @@ class yandex{
switch($pagetype){ switch($pagetype){
case "web":
return [
"lang" => [
"display" => "Language",
"option" => [
"any" => "Any language",
"en" => "English",
"ru" => "Russian",
"be" => "Belorussian",
"fr" => "French",
"de" => "German",
"id" => "Indonesian",
"kk" => "Kazakh",
"tt" => "Tatar",
"tr" => "Turkish",
"uk" => "Ukrainian"
]
],
"newer" => [
"display" => "Newer than",
"option" => "_DATE"
],
"older" => [
"display" => "Older than",
"option" => "_DATE"
]
];
break;
case "images": case "images":
return return
[ [
@ -149,12 +176,214 @@ class yandex{
]; ];
break; break;
default: case "videos":
return []; return [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No"
]
],
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"9" => "Recently"
]
],
"duration" => [
"display" => "Duration",
"option" => [
"any" => "Any duration",
"short" => "Short"
]
]
];
break; break;
} }
} }
public function web($get){
// has captcha
// https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
// https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
// &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
if($get["npt"]){
$npt = $this->nextpage->get($get["npt"], "web");
$html =
$this->get(
"https://yandex.com" . $npt,
[],
"yes"
);
}else{
$search = $get["s"];
$lang = $get["lang"];
$older = $get["older"];
$newer = $get["newer"];
$params = [
"text" => $search,
"web" => "1",
"frame" => "1",
"searchid" => "3131712"
];
if($lang != "any"){
$params["lang"] = $lang;
}
if(
$newer === false &&
$older !== false
){
$newer = 0;
}
if($newer !== false){
$params["from_day"] = date("j", $newer);
$params["from_month"] = date("n", $newer);
$params["from_year"] = date("Y", $newer);
if($older === false){
$older = time();
}
$params["to_day"] = date("j", $older);
$params["to_month"] = date("n", $older);
$params["to_year"] = date("Y", $older);
}
try{
$html =
$this->get(
"https://yandex.com/search/site/",
$params,
"yes"
);
}catch(Exception $error){
throw new Exception("Could not get search page");
}
/*
$handle = fopen("scraper/yandex.html", "r");
$html = fread($handle, filesize("scraper/yandex.html"));
fclose($handle);*/
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$this->fuckhtml->load($html);
// get nextpage
$npt =
$this->fuckhtml
->getElementsByClassName(
"b-pager__next",
"a"
);
if(count($npt) !== 0){
$out["npt"] =
$this->nextpage->store(
$this->fuckhtml
->getTextContent(
$npt
[0]
["attributes"]
["href"]
),
"web"
);
}
// get items
$items =
$this->fuckhtml
->getElementsByClassName(
"b-serp-item",
"li"
);
foreach($items as $item){
$this->fuckhtml->load($item);
$link =
$this->fuckhtml
->getElementsByClassName(
"b-serp-item__title-link",
"a"
)[0];
$out["web"][] = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$link
)
),
"description" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"b-serp-item__text",
"div"
)[0]
)
),
"url" =>
$this->fuckhtml
->getTextContent(
$link
["attributes"]
["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
public function image($get){ public function image($get){
if($get["npt"]){ if($get["npt"]){
@ -402,7 +631,7 @@ class yandex{
$json["type"] == "captcha" $json["type"] == "captcha"
){ ){
throw new Exception("Yandex blocked this 4get instance. Yandex blocks don't last very long, but the block timer gets reset everytime you make another unsuccessful request. Please try again in ~7 minutes."); throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes.");
} }
if($json === null){ if($json === null){
@ -513,6 +742,359 @@ class yandex{
return $out; return $out;
} }
public function video($get){
if($get["npt"]){
$params =
json_decode(
$this->nextpage->get(
$get["npt"],
"web"
),
true
);
$nsfw = $params["nsfw"];
unset($params["nsfw"]);
}else{
$search = $get["s"];
$nsfw = $get["nsfw"];
$time = $get["time"];
$duration = $get["duration"];
// https://yandex.com/video/search
// ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
// &format=json
// &request=
// {
// "blocks":[
// {"block":"extra-content","params":{},"version":2},
// {"block":"i-global__params:ajax","params":{},"version":2},
// {"block":"search2:ajax","params":{},"version":2},
// {"block":"vital-incut","params":{},"version":2},
// {"block":"content_type_search","params":{},"version":2},
// {"block":"serp-controller","params":{},"version":2},
// {"block":"cookies_ajax","params":{},"version":2}
// ],
// "metadata":{
// "bundles":{"lb":"^G]!q<X120"},
// "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
// "extraContent":{"names":["i-react-ajax-adapter"]}
// }
// }
// &yu=4861394161661655015
// &from=tabbar
// &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
// &suggest_reqid=486139416166165501562797413447032
// &text=minecraft
$params = [
"tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
"format" => "json",
"request" => json_encode([
"blocks" => [
(object)[
"block" => "extra-content",
"params" => (object)[],
"version" => 2
],
(object)[
"block" => "i-global__params:ajax",
"params" => (object)[],
"version" => 2
],
(object)[
"block" => "search2:ajax",
"params" => (object)[],
"version" => 2
],
(object)[
"block" => "vital-incut",
"params" => (object)[],
"version" => 2
],
(object)[
"block" => "content_type_search",
"params" => (object)[],
"version" => 2
],
(object)[
"block" => "serp-controller",
"params" => (object)[],
"version" => 2
],
(object)[
"block" => "cookies_ajax",
"params" => (object)[],
"version" => 2
]
],
"metadata" => (object)[
"bundles" => (object)[
"lb" => "^G]!q<X120"
],
"assets" => (object)[
"las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
],
"extraContent" => (object)[
"names" => [
"i-react-ajax-adapter"
]
]
]
]),
"text" => $search
];
if($duration != "any"){
$params["duration"] = $duration;
}
if($time != "any"){
$params["within"] = $time;
}
}
/*
$handle = fopen("scraper/yandex-video.json", "r");
$json = fread($handle, filesize("scraper/yandex-video.json"));
fclose($handle);
*/
try{
$json =
$this->get(
"https://yandex.com/video/search",
$params,
$nsfw
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
$json = json_decode($json, true);
if($json === null){
throw new Exception("Could not parse JSON");
}
if(!isset($json["blocks"])){
throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
}
$out = [
"status" => "ok",
"npt" => null,
"video" => [],
"author" => [],
"livestream" => [],
"playlist" => [],
"reel" => []
];
$html = null;
foreach($json["blocks"] as $block){
if(isset($block["html"])){
$html .= $block["html"];
}
}
$this->fuckhtml->load($html);
$div =
$this->fuckhtml
->getElementsByTagName("div");
/*
Get nextpage
*/
$npt =
$this->fuckhtml
->getElementsByClassName(
"more more_direction_next i-bem",
$div
);
if(count($npt) !== 0){
$params["p"] = "1";
$params["nsfw"] = $nsfw;
$out["npt"] =
$this->nextpage->store(
json_encode($params),
"web"
);
}
$items =
$this->fuckhtml
->getElementsByClassName(
"serp-item",
$div
);
foreach($items as $item){
$data =
json_decode(
$this->fuckhtml
->getTextContent(
$item["attributes"]["data-video"]
),
true
);
$this->fuckhtml->load($item);
$thumb =
$this->fuckhtml
->getElementsByClassName(
"thumb-image__image",
"img"
);
if(count($thumb) === 0){
$thumb = [
"url" => null,
"ratio" => null
];
}else{
$c = 1;
$thumb = [
"url" =>
str_replace(
"//",
"https://",
$this->fuckhtml
->getTextContent(
$thumb
[0]
["attributes"]
["src"]
),
$c
),
"ratio" => "16:9"
];
}
$smallinfos =
$this->fuckhtml
->getElementsByClassName(
"serp-item__sitelinks-item",
"div"
);
$date = null;
$views = null;
$first = true;
foreach($smallinfos as $info){
if($first){
$first = false;
continue;
}
$info =
$this->fuckhtml
->getTextContent(
$info
);
if($temp_date = strtotime($info)){
$date = $temp_date;
}else{
$views = $this->parseviews($info);
}
}
$description =
$this->fuckhtml
->getElementsByClassName(
"serp-item__text serp-item__text_visibleText_always",
"div"
);
if(count($description) === 0){
$description = null;
}else{
$description =
$this->titledots(
$this->fuckhtml
->getTextContent(
$description[0]
)
);
}
$out["video"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$this->titledots(
$data["title"]
)
),
"description" => $description,
"author" => [
"name" => null,
"url" => null,
"avatar" => null
],
"date" => $date,
"duration" =>
(int)$data
["counters"]
["toHostingLoaded"]
["stredParams"]
["duration"],
"views" => $views,
"thumb" => $thumb,
"url" =>
$this->fuckhtml
->getTextContent(
$data["counters"]
["toHostingLoaded"]
["postfix"]
["href"]
)
];
}
return $out;
}
private function parseviews($text){
$text = explode(" ", $text);
$num = (float)$text[0];
$mod = $text[1];
switch($mod){
case "bln.": $num = $num * 1000000000; break;
case "mln.": $num = $num * 1000000; break;
case "thsd.": $num = $num * 1000; break;
}
return $num;
}
private function titledots($title){ private function titledots($title){
$substr = substr($title, -3); $substr = substr($title, -3);

View file

@ -70,6 +70,10 @@ $settings = [
"value" => "brave", "value" => "brave",
"text" => "Brave" "text" => "Brave"
], ],
[
"value" => "yandex",
"text" => "Yandex"
],
/*[ /*[
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
@ -118,10 +122,6 @@ $settings = [
"value" => "yt", "value" => "yt",
"text" => "YouTube" "text" => "YouTube"
], ],
[
"value" => "fb",
"text" => "Facebook videos"
],
[ [
"value" => "ddg", "value" => "ddg",
"text" => "DuckDuckGo" "text" => "DuckDuckGo"
@ -129,6 +129,10 @@ $settings = [
[ [
"value" => "brave", "value" => "brave",
"text" => "Brave" "text" => "Brave"
],
[
"value" => "yandex",
"text" => "Yandex"
]/*, ]/*,
[ [
"value" => "google", "value" => "google",
@ -147,8 +151,8 @@ $settings = [
[ [
"value" => "brave", "value" => "brave",
"text" => "Brave" "text" => "Brave"
],/* ],
[ /*[
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
],*/ ],*/