v8
This commit is contained in:
parent
9e18327df6
commit
130358a9e0
16 changed files with 1385 additions and 457 deletions
893
scraper/qwant.php
Normal file
893
scraper/qwant.php
Normal file
|
@ -0,0 +1,893 @@
|
|||
<?php
|
||||
|
||||
class qwant{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("qwant");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
$base = [
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"maybe" => "Maybe",
|
||||
"no" => "No"
|
||||
]
|
||||
],
|
||||
"country" => [
|
||||
"display" => "Country",
|
||||
"option" => [
|
||||
"en_US" => "United States",
|
||||
"fr_FR" => "France",
|
||||
"en_GB" => "Great Britain",
|
||||
"de_DE" => "Germany",
|
||||
"it_IT" => "Italy",
|
||||
"es_AR" => "Argentina",
|
||||
"en_AU" => "Australia",
|
||||
"es_ES" => "Spain (es)",
|
||||
"ca_ES" => "Spain (ca)",
|
||||
"cs_CZ" => "Czech Republic",
|
||||
"ro_RO" => "Romania",
|
||||
"el_GR" => "Greece",
|
||||
"zh_CN" => "China",
|
||||
"zh_HK" => "Hong Kong",
|
||||
"en_NZ" => "New Zealand",
|
||||
"fr_FR" => "France",
|
||||
"th_TH" => "Thailand",
|
||||
"ko_KR" => "South Korea",
|
||||
"sv_SE" => "Sweden",
|
||||
"nb_NO" => "Norway",
|
||||
"da_DK" => "Denmark",
|
||||
"hu_HU" => "Hungary",
|
||||
"et_EE" => "Estonia",
|
||||
"es_MX" => "Mexico",
|
||||
"es_CL" => "Chile",
|
||||
"en_CA" => "Canada (en)",
|
||||
"fr_CA" => "Canada (fr)",
|
||||
"en_MY" => "Malaysia",
|
||||
"bg_BG" => "Bulgaria",
|
||||
"fi_FI" => "Finland",
|
||||
"pl_PL" => "Poland",
|
||||
"nl_NL" => "Netherlands",
|
||||
"pt_PT" => "Portugal",
|
||||
"de_CH" => "Switzerland (de)",
|
||||
"fr_CH" => "Switzerland (fr)",
|
||||
"it_CH" => "Switzerland (it)",
|
||||
"de_AT" => "Austria",
|
||||
"fr_BE" => "Belgium (fr)",
|
||||
"nl_BE" => "Belgium (nl)",
|
||||
"en_IE" => "Ireland",
|
||||
"he_IL" => "Israel"
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
switch($page){
|
||||
|
||||
case "web":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"day" => "Past 24 hours",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month"
|
||||
]
|
||||
],
|
||||
"extendedsearch" => [
|
||||
// no display, wont show in interface
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"no" => "No"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case "images":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"day" => "Past 24 hours",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month"
|
||||
]
|
||||
],
|
||||
"size" => [
|
||||
"display" => "Size",
|
||||
"option" => [
|
||||
"any" => "Any size",
|
||||
"large" => "Large",
|
||||
"medium" => "Medium",
|
||||
"small" => "Small"
|
||||
]
|
||||
],
|
||||
"color" => [
|
||||
"display" => "Color",
|
||||
"option" => [
|
||||
"any" => "Any color",
|
||||
"coloronly" => "Color only",
|
||||
"monochrome" => "Monochrome",
|
||||
"black" => "Black",
|
||||
"brown" => "Brown",
|
||||
"gray" => "Gray",
|
||||
"white" => "White",
|
||||
"yellow" => "Yellow",
|
||||
"orange" => "Orange",
|
||||
"red" => "Red",
|
||||
"pink" => "Pink",
|
||||
"purple" => "Purple",
|
||||
"blue" => "Blue",
|
||||
"teal" => "Teal",
|
||||
"green" => "Green"
|
||||
]
|
||||
],
|
||||
"imagetype" => [
|
||||
"display" => "Type",
|
||||
"option" => [
|
||||
"any" => "Any type",
|
||||
"animatedgif" => "Animated GIF",
|
||||
"photo" => "Photograph",
|
||||
"transparent" => "Transparent"
|
||||
]
|
||||
],
|
||||
"license" => [
|
||||
"display" => "License",
|
||||
"option" => [
|
||||
"any" => "Any license",
|
||||
"share" => "Non-commercial reproduction and sharing",
|
||||
"sharecommercially" => "Reproduction and sharing",
|
||||
"modify" => "Non-commercial reproduction, sharing and modification",
|
||||
"modifycommercially" => "Reproduction, sharing and modification",
|
||||
"public" => "Public domain"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case "videos":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"order" => [
|
||||
"display" => "Order by",
|
||||
"option" => [
|
||||
"relevance" => "Relevance",
|
||||
"views" => "Views",
|
||||
"date" => "Most recent",
|
||||
]
|
||||
],
|
||||
"source" => [
|
||||
"display" => "Source",
|
||||
"option" => [
|
||||
"any" => "Any source",
|
||||
"youtube" => "YouTube",
|
||||
"dailymotion" => "Dailymotion",
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case "news":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"hour" => "Less than 1 hour ago",
|
||||
"day" => "Past 24 hours",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month"
|
||||
]
|
||||
],
|
||||
"order" => [
|
||||
"display" => "Order by",
|
||||
"option" => [
|
||||
"relevance" => "Relevance",
|
||||
"date" => "Most recent"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
return $base;
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$headers = [
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: application/json, text/plain, */*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Origin: https://www.qwant.com",
|
||||
"Referer: https://www.qwant.com/",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"TE: trailers"
|
||||
];
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
// Bypass HTTP/2 check
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
// get next page data
|
||||
[$params, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
$params = json_decode($params, true);
|
||||
|
||||
}else{
|
||||
|
||||
// get _GET data instead
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
if(strlen($search) > 2048){
|
||||
|
||||
throw new Exception("Search term is too long!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"freshness" => $get["time"],
|
||||
"count" => 10,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0,
|
||||
"device" => "desktop",
|
||||
"tgp" => 3,
|
||||
"safesearch" => 0,
|
||||
"displayed" => "true"
|
||||
];
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
$handle = fopen("scraper/qwant_web.json", "r");
|
||||
$json = fread($handle, filesize("scraper/qwant_web.json"));
|
||||
fclose($handle);*/
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://fdn.qwant.com/v3/search/web",
|
||||
$params
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Could not fetch JSON");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === NULL){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json["data"]["message"][0])){
|
||||
|
||||
throw new Exception("Server returned an error:\n" . $json["data"]["message"][0]);
|
||||
}
|
||||
|
||||
if($json["status"] != "success"){
|
||||
|
||||
if($json["data"]["error_code"] === 5){
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
throw new Exception("Server returned an error code: " . $json["data"]["error_code"]);
|
||||
}
|
||||
|
||||
if(!isset($json["data"]["result"]["items"]["mainline"])){
|
||||
|
||||
throw new Exception("Server did not return a result object");
|
||||
}
|
||||
|
||||
// data is OK, parse
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// get instant answer
|
||||
if(
|
||||
$get["extendedsearch"] == "yes" &&
|
||||
isset($json["data"]["result"]["items"]["sidebar"][0]["endpoint"])
|
||||
){
|
||||
|
||||
try{
|
||||
$answer =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://api.qwant.com/v3" .
|
||||
$json["data"]["result"]["items"]["sidebar"][0]["endpoint"],
|
||||
[]
|
||||
);
|
||||
|
||||
$answer = json_decode($answer, true);
|
||||
|
||||
if(
|
||||
$answer === null ||
|
||||
$answer["status"] != "success" ||
|
||||
$answer["data"]["result"] === null
|
||||
){
|
||||
|
||||
throw new Exception();
|
||||
}
|
||||
|
||||
// parse answer
|
||||
$out["answer"][] = [
|
||||
"title" => $answer["data"]["result"]["title"],
|
||||
"description" => [
|
||||
[
|
||||
"type" => "text",
|
||||
"value" => $this->trimdots($answer["data"]["result"]["description"])
|
||||
]
|
||||
],
|
||||
"url" => $answer["data"]["result"]["url"],
|
||||
"thumb" =>
|
||||
$answer["data"]["result"]["thumbnail"]["landscape"] == null ?
|
||||
null :
|
||||
$this->unshitimage(
|
||||
$answer["data"]["result"]["thumbnail"]["landscape"],
|
||||
false
|
||||
),
|
||||
"table" => [],
|
||||
"sublink" => []
|
||||
];
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
// do nothing in case of failure
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// get word correction
|
||||
if(isset($json["data"]["query"]["queryContext"]["alteredQuery"])){
|
||||
|
||||
$out["spelling"] = [
|
||||
"type" => "including",
|
||||
"using" => $json["data"]["query"]["queryContext"]["alteredQuery"],
|
||||
"correction" => $json["data"]["query"]["queryContext"]["alterationOverrideQuery"]
|
||||
];
|
||||
}
|
||||
|
||||
// check for next page
|
||||
if($json["data"]["result"]["lastPage"] === false){
|
||||
|
||||
$params["offset"] = $params["offset"] + 10;
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode($params),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
// parse results
|
||||
foreach($json["data"]["result"]["items"]["mainline"] as $item){
|
||||
|
||||
switch($item["type"]){ // ignores ads
|
||||
|
||||
case "web":
|
||||
foreach($item["items"] as $result){
|
||||
|
||||
if(isset($result["thumbnailUrl"])){
|
||||
|
||||
$thumb = [
|
||||
"url" => $this->unshitimage($result["thumbnailUrl"]),
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}
|
||||
|
||||
$sublinks = [];
|
||||
if(isset($result["links"])){
|
||||
|
||||
foreach($result["links"] as $link){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" => $this->trimdots($link["title"]),
|
||||
"date" => null,
|
||||
"description" => isset($link["desc"]) ? $this->trimdots($link["desc"]) : null,
|
||||
"url" => $link["url"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $this->trimdots($result["title"]),
|
||||
"description" => $this->trimdots($result["desc"]),
|
||||
"url" => $result["url"],
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => $thumb,
|
||||
"sublink" => $sublinks,
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
break;
|
||||
|
||||
case "images":
|
||||
foreach($item["items"] as $image){
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $image["title"],
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["media"],
|
||||
"width" => (int)$image["width"],
|
||||
"height" => (int)$image["height"]
|
||||
],
|
||||
[
|
||||
"url" => $this->unshitimage($image["thumbnail"]),
|
||||
"width" => $image["thumb_width"],
|
||||
"height" => $image["thumb_height"]
|
||||
]
|
||||
],
|
||||
"url" => $image["url"]
|
||||
];
|
||||
}
|
||||
break;
|
||||
|
||||
case "videos":
|
||||
foreach($item["items"] as $video){
|
||||
|
||||
$out["video"][] = [
|
||||
"title" => $video["title"],
|
||||
"description" => null,
|
||||
"date" => (int)$video["date"],
|
||||
"duration" => $video["duration"] === null ? null : $video["duration"] / 1000,
|
||||
"views" => null,
|
||||
"thumb" =>
|
||||
$video["thumbnail"] === null ?
|
||||
[
|
||||
"url" => null,
|
||||
"ratio" => null,
|
||||
] :
|
||||
[
|
||||
"url" => $this->unshitimage($video["thumbnail"]),
|
||||
"ratio" => "16:9",
|
||||
],
|
||||
"url" => $video["url"]
|
||||
];
|
||||
}
|
||||
break;
|
||||
|
||||
case "related_searches":
|
||||
foreach($item["items"] as $related){
|
||||
|
||||
$out["related"][] = $related["text"];
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$params, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"images"
|
||||
);
|
||||
|
||||
$params = json_decode($params, true);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$params = [
|
||||
"t" => "images",
|
||||
"q" => $search,
|
||||
"count" => 125,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0, // increment by 125
|
||||
"device" => "desktop",
|
||||
"tgp" => 3
|
||||
];
|
||||
|
||||
if($get["time"] != "any"){
|
||||
|
||||
$params["freshness"] = $get["time"];
|
||||
}
|
||||
|
||||
foreach(["size", "color", "imagetype", "license"] as $p){
|
||||
|
||||
if($get[$p] != "any"){
|
||||
|
||||
$params[$p] = $get[$p];
|
||||
}
|
||||
}
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
$json = $this->get(
|
||||
$proxy,
|
||||
"https://api.qwant.com/v3/search/images",
|
||||
$params,
|
||||
);
|
||||
}catch(Exception $err){
|
||||
|
||||
throw new Exception("Failed to get JSON");
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/yandex.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex.json"));
|
||||
fclose($handle);*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if($json["status"] != "success"){
|
||||
|
||||
throw new Exception("Qwant returned an API error");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
if($json["data"]["result"]["lastPage"] === false){
|
||||
|
||||
$params["offset"] = $params["offset"] + 125;
|
||||
|
||||
$out["npt"] = $this->backend->store(
|
||||
json_encode($params),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
foreach($json["data"]["result"]["items"] as $image){
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $this->trimdots($image["title"]),
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["media"],
|
||||
"width" => $image["width"],
|
||||
"height" => $image["height"]
|
||||
],
|
||||
[
|
||||
"url" => $this->unshitimage($image["thumbnail"]),
|
||||
"width" => $image["thumb_width"],
|
||||
"height" => $image["thumb_height"]
|
||||
]
|
||||
],
|
||||
"url" => $image["url"]
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function video($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$params = [
|
||||
"t" => "videos",
|
||||
"q" => $search,
|
||||
"count" => 50,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0, // dont implement pagination
|
||||
"device" => "desktop",
|
||||
"tgp" => 3
|
||||
];
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.qwant.com/v3/search/videos",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Could not fetch JSON");
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/yandex-video.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex-video.json"));
|
||||
fclose($handle);
|
||||
*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Could not parse JSON");
|
||||
}
|
||||
|
||||
if($json["status"] != "success"){
|
||||
|
||||
throw new Exception("Qwant returned an API error");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"video" => [],
|
||||
"author" => [],
|
||||
"livestream" => [],
|
||||
"playlist" => [],
|
||||
"reel" => []
|
||||
];
|
||||
|
||||
foreach($json["data"]["result"]["items"] as $video){
|
||||
|
||||
if(empty($video["thumbnail"])){
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" => $this->unshitimage($video["thumbnail"], false),
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}
|
||||
|
||||
$duration = (int)$video["duration"];
|
||||
|
||||
$out["video"][] = [
|
||||
"title" => $video["title"],
|
||||
"description" => $this->limitstrlen($video["desc"]),
|
||||
"author" => [
|
||||
"name" => $video["channel"],
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" => (int)$video["date"],
|
||||
"duration" => $duration === 0 ? null : $duration,
|
||||
"views" => null,
|
||||
"thumb" => $thumb,
|
||||
"url" => preg_replace("/\?syndication=.+/", "", $video["url"])
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function news($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$params = [
|
||||
"t" => "news",
|
||||
"q" => $search,
|
||||
"count" => 50,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0, // dont implement pagination
|
||||
"device" => "desktop",
|
||||
"tgp" => 3
|
||||
];
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.qwant.com/v3/search/news",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Could not fetch JSON");
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/yandex-video.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex-video.json"));
|
||||
fclose($handle);
|
||||
*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Could not parse JSON");
|
||||
}
|
||||
|
||||
if($json["status"] != "success"){
|
||||
|
||||
throw new Exception("Qwant returned an API error");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"news" => []
|
||||
];
|
||||
|
||||
foreach($json["data"]["result"]["items"] as $news){
|
||||
|
||||
if(empty($news["media"][0]["pict_big"]["url"])){
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false),
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}
|
||||
|
||||
$out["news"][] = [
|
||||
"title" => $news["title"],
|
||||
"author" => $news["press_name"],
|
||||
"description" => $this->trimdots($news["desc"]),
|
||||
"date" => (int)$news["date"],
|
||||
"thumb" => $thumb,
|
||||
"url" => $news["url"]
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function limitstrlen($text){
|
||||
|
||||
return explode("\n", wordwrap($text, 300, "\n"))[0];
|
||||
}
|
||||
|
||||
private function trimdots($text){
|
||||
|
||||
return trim($text, ". ");
|
||||
}
|
||||
|
||||
private function unshitimage($url, $is_bing = true){
|
||||
|
||||
// https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0
|
||||
parse_str(parse_url($url)["query"], $parts);
|
||||
|
||||
if($is_bing){
|
||||
$parse = parse_url($parts["u"]);
|
||||
parse_str($parse["query"], $parts);
|
||||
|
||||
return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
|
||||
}
|
||||
|
||||
return $parts["u"];
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue