From 033e4cb959e2b7204d8953311f37a2e41339c681 Mon Sep 17 00:00:00 2001 From: lolcat Date: Sat, 11 Jan 2025 23:07:58 -0500 Subject: [PATCH] added vsco scraper --- README.md | 6 +- data/config.php | 1 + lib/frontend.php | 1 + scraper/vsco.php | 257 +++++++++++++++++++++++++++++++++++++++++++++++ settings.php | 4 + 5 files changed, 266 insertions(+), 3 deletions(-) create mode 100644 scraper/vsco.php diff --git a/README.md b/README.md index 4ab1b23..8f483fb 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,9 @@ tl;dr 4get is the best way to browse for shit. | Yep | Solofield | Solofield | | | Ghostery | | Greppr | Pinterest | | | | Yep | | Crowdview | 500px | | | | Marginalia | -| Mwmbl | Imgur | | | | YouTube | -| Mojeek | FindThatMeme | | | | Soundcloud | -| Solofield | | | | | | +| Mwmbl | VSCO | | | | YouTube | +| Mojeek | Imgur | | | | Soundcloud | +| Solofield | FindThatMeme | | | | | | Marginalia | | | | | | | wiby | | | | | | | Curlie | | | | | | diff --git a/data/config.php b/data/config.php index 028a232..46b83e5 100644 --- a/data/config.php +++ b/data/config.php @@ -144,6 +144,7 @@ class config{ const PROXY_YEP = false; const PROXY_PINTEREST = false; const PROXY_FIVEHPX = false; + const PROXY_VSCO = false; const PROXY_SEZNAM = false; const PROXY_NAVER = false; const PROXY_GREPPR = false; diff --git a/lib/frontend.php b/lib/frontend.php index a335360..6b439e2 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -971,6 +971,7 @@ class frontend{ "solofield" => "Solofield", "pinterest" => "Pinterest", "fivehpx" => "500px", + "vsco" => "VSCO", "imgur" => "Imgur", "ftm" => "FindThatMeme" ] diff --git a/scraper/vsco.php b/scraper/vsco.php new file mode 100644 index 0000000..8a7f057 --- /dev/null +++ b/scraper/vsco.php @@ -0,0 +1,257 @@ +backend = new backend("vsco"); + } + + public function getfilters($page){ + + return []; + } + + private function get($proxy, $url, $get = [], $bearer = null){ + + $curlproc = curl_init(); + + if($get !== []){ + $get_tmp = http_build_query($get); + $url .= "?" . $get_tmp; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + + if($bearer === null){ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Upgrade-Insecure-Requests: 1", + "Sec-Fetch-Dest: document", + "Sec-Fetch-Mode: navigate", + "Sec-Fetch-Site: same-origin", + "Sec-Fetch-User: ?1", + "Priority: u=0, i", + "TE: trailers"] + ); + }else{ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: */*", + "Accept-Language: en-US", + "Accept-Encoding: gzip", + "Referer: https://vsco.co/search/images/" . urlencode($get["query"]), + "authorization: Bearer " . $bearer, + "content-type: application/json", + "x-client-build: 1", + "x-client-platform: web", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-origin", + "Priority: u=0", + "TE: trailers"] + ); + } + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + // http2 bypass + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function image($get){ + + if($get["npt"]){ + + [$data, $proxy] = + $this->backend->get( + $get["npt"], "images" + ); + + $data = json_decode($data, true); + + }else{ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $proxy = $this->backend->get_ip(); + + // get bearer token + try{ + + $html = + $this->get( + $proxy, + "https://vsco.co/feed" + ); + + }catch(Exception $error){ + + throw new Exception("Failed to fetch feed page"); + } + + preg_match( + '/"tkn":"([A-z0-9]+)"/', + $html, + $bearer + ); + + if(!isset($bearer[1])){ + + throw new Exception("Failed to grep bearer token"); + } + + $data = [ + "pagination" => [ + "query" => $search, + "page" => 0, + "size" => 100 + ], + "bearer" => $bearer[1] + ]; + } + + try{ + + $json = + $this->get( + $proxy, + "https://vsco.co/api/2.0/search/images", + $data["pagination"], + $data["bearer"] + ); + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Failed to decode JSON"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + if(!isset($json["results"])){ + + throw new Exception("Failed to access results object"); + } + + foreach($json["results"] as $image){ + + $image_domain = parse_url("https://" . $image["responsive_url"], PHP_URL_HOST); + $thumbnail = explode($image_domain, $image["responsive_url"], 2)[1]; + + if(substr($thumbnail, 0, 3) != "/1/"){ + + $thumbnail = + preg_replace( + '/^\/[^\/]+/', + "", + $thumbnail + ); + } + + $thumbnail = "https://img.vsco.co/cdn-cgi/image/width=480,height=360" . $thumbnail; + $size = + $this->image_ratio( + (int)$image["dimensions"]["width"], + (int)$image["dimensions"]["height"] + ); + + $out["image"][] = [ + "title" => $image["description"], + "source" => [ + [ + "url" => "https://" . $image["responsive_url"], + "width" => (int)$image["dimensions"]["width"], + "height" => (int)$image["dimensions"]["height"] + ], + [ + "url" => $thumbnail, + "width" => $size[0], + "height" => $size[1] + ] + ], + "url" => "https://" . $image["grid"]["domain"] . "/media/" . $image["imageId"] + ]; + } + + // get NPT + $max_page = ceil($json["total"] / 100); + $data["pagination"]["page"]++; + + if($max_page > $data["pagination"]["page"]){ + + $out["npt"] = + $this->backend->store( + json_encode($data), + "images", + $proxy + ); + } + + return $out; + } + + private function image_ratio($width, $height){ + + $ratio = [ + 480 / $width, + 360 / $height + ]; + + if($ratio[0] < $ratio[1]){ + + $ratio = $ratio[0]; + }else{ + + $ratio = $ratio[1]; + } + + return [ + floor($width * $ratio), + floor($height * $ratio) + ]; + } +} diff --git a/settings.php b/settings.php index 6b3f774..b150e10 100644 --- a/settings.php +++ b/settings.php @@ -235,6 +235,10 @@ $settings = [ "value" => "fivehpx", "text" => "500px" ], + [ + "value" => "vsco", + "text" => "VSCO" + ], [ "value" => "imgur", "text" => "Imgur"