captcha and imgur, findthatmeme, yep imagesearch
This commit is contained in:
parent
fa9dc4d6ef
commit
3aa0180774
26 changed files with 1710 additions and 63 deletions
249
scraper/imgur.php
Normal file
249
scraper/imgur.php
Normal file
|
@ -0,0 +1,249 @@
|
|||
<?php
|
||||
|
||||
class imgur{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/nextpage.php";
|
||||
$this->nextpage = new nextpage("imgur");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"sort" => [ // /score/
|
||||
"display" => "Sort by",
|
||||
"option" => [
|
||||
"score" => "Highest scoring",
|
||||
"relevance" => "Most relevant",
|
||||
"time" => "Newest first"
|
||||
]
|
||||
],
|
||||
"time" => [ // /score/day/
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"all" => "All time",
|
||||
"day" => "Today",
|
||||
"week" => "This week",
|
||||
"month" => "This month",
|
||||
"year" => "This year"
|
||||
]
|
||||
],
|
||||
"format" => [ // q_type
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any format",
|
||||
"jpg" => "JPG",
|
||||
"png" => "PNG",
|
||||
"gif" => "GIF",
|
||||
"anigif" => "Animated GIF",
|
||||
"album" => "Albums"
|
||||
]
|
||||
],
|
||||
"size" => [ // q_size_px
|
||||
"display" => "Size",
|
||||
"option" => [
|
||||
"any" => "Any size",
|
||||
"small" => "Small (500px or less)",
|
||||
"med" => "Medium (500px to 2000px)",
|
||||
"big" => "Big (2000px to 5000px)",
|
||||
"lrg" => "Large (5000px to 10000px)",
|
||||
"huge" => "Huge (10000px and above)"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?scrolled&" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Referer: https://imgur.com/search/",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"TE: trailers",
|
||||
"X-Requested-With: XMLHttpRequest"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
$filter =
|
||||
json_decode(
|
||||
$this->nextpage->get(
|
||||
$get["npt"],
|
||||
"images"
|
||||
),
|
||||
true
|
||||
);
|
||||
|
||||
$search = $filter["s"];
|
||||
unset($filter["s"]);
|
||||
|
||||
$sort = $filter["sort"];
|
||||
unset($filter["sort"]);
|
||||
|
||||
$time = $filter["time"];
|
||||
unset($filter["time"]);
|
||||
|
||||
$format = $filter["format"];
|
||||
unset($filter["format"]);
|
||||
|
||||
$size = $filter["size"];
|
||||
unset($filter["size"]);
|
||||
|
||||
$page = $filter["page"];
|
||||
unset($filter["page"]);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
$sort = $get["sort"];
|
||||
$time = $get["time"];
|
||||
$format = $get["format"];
|
||||
$size = $get["size"];
|
||||
$page = 0;
|
||||
|
||||
$filter = [
|
||||
"q" => $search
|
||||
];
|
||||
|
||||
if($format != "any"){
|
||||
|
||||
$filter["q_type"] = $format;
|
||||
}
|
||||
|
||||
if($size != "any"){
|
||||
|
||||
$filter["q_size_px"] = $size;
|
||||
$filter["q_size_is_mpx"] = "off";
|
||||
}
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
try{
|
||||
$html =
|
||||
$this->get(
|
||||
"https://imgur.com/search/$sort/$time/page/$page",
|
||||
$filter
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch HTML");
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$posts =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"post",
|
||||
"div"
|
||||
);
|
||||
|
||||
foreach($posts as $post){
|
||||
|
||||
$this->fuckhtml->load($post);
|
||||
|
||||
$image =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("img")[0];
|
||||
|
||||
$image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5);
|
||||
|
||||
$out["image"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$image["attributes"]["alt"]
|
||||
),
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image_url . ".jpg",
|
||||
"width" => null,
|
||||
"height" => null
|
||||
],
|
||||
[
|
||||
"url" => $image_url . "m.jpg",
|
||||
"width" => null,
|
||||
"height" => null
|
||||
]
|
||||
],
|
||||
"url" =>
|
||||
"https://imgur.com" .
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"image-list-link",
|
||||
"a"
|
||||
)
|
||||
[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
if(isset($out["image"][0])){
|
||||
|
||||
// store nextpage
|
||||
$filter["s"] = $search;
|
||||
$filter["sort"] = $sort;
|
||||
$filter["time"] = $time;
|
||||
$filter["format"] = $format;
|
||||
$filter["size"] = $size;
|
||||
$filter["page"] = $page + 1;
|
||||
|
||||
$out["npt"] =
|
||||
$this->nextpage->store(
|
||||
json_encode($filter),
|
||||
"images"
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue