1
Fork 1
mirror of https://git.lolcat.ca/lolcat/4get.git synced 2024-09-13 17:28:08 -04:00

Compare commits

...

7 commits

Author SHA1 Message Date
lolcat
458bd8c1b5 ok i think i unfucked it 2024-09-02 00:03:09 -04:00
lolcat
c6cf9afb6e solofield fix 2024-09-01 23:59:10 -04:00
lolcat
435913a1e5 solofield thumbnail fix 2024-09-01 20:59:10 -04:00
lolcat
e4f94c43db fucking forgot something again dementia dementia dementia 2024-09-01 20:36:09 -04:00
lolcat
c9eec8260b added solofield 2024-09-01 20:35:21 -04:00
lolcat
063397dbd1 err 2024-09-01 10:52:46 -04:00
lolcat
1a5a653be3 added ghostery search 2024-09-01 10:52:28 -04:00
7 changed files with 1021 additions and 4 deletions

View file

@ -1,4 +1,4 @@
[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/W7W2OZK5H)
## <a href="https://4get.ca/donate">Donate to the project here!</a>
# 4get search
**4get** is a proxy search engine that doesn't suck.
@ -37,11 +37,13 @@ tl;dr the best way to actually browse for shit.
| Google | Google | Yandex | Startpage | | Google |
| Startpage | Startpage | Google | Qwant | | Startpage |
| Qwant | Qwant | Startpage | Mojeek | | Kagi |
| Yep | Yep | Qwant | | | Qwant |
| Ghostery | Yep | Qwant | | | Qwant |
| Yep | Solofield | Solofield | | | Ghostery |
| Greppr | Imgur | | | | Yep |
| Crowdview | FindThatMeme | | | | Marginalia |
| Mwmbl | | | | | YouTube |
| Mojeek | | | | | Soundcloud |
| Solofield | | | | | |
| Marginalia | | | | | |
| wiby | | | | | |
| Curlie | | | | | |

View file

@ -20,7 +20,8 @@ class autocomplete{
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
"sc" => "",
"startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english",
"kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}"
"kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}",
"ghostery" => "https://ghosterysearch.com/suggest?q={searchTerms}"
];
/*

View file

@ -131,10 +131,12 @@ class config{
const PROXY_GOOGLE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_GHOSTERY = false;
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud
const PROXY_SPOTIFY = false;
const PROXY_SOLOFIELD = false;
const PROXY_WIBY = false;
const PROXY_CURLIE = false;
const PROXY_YT = false; // youtube

View file

@ -941,11 +941,13 @@ class frontend{
"google" => "Google",
"startpage" => "Startpage",
"qwant" => "Qwant",
"ghostery" => "Ghostery",
"yep" => "Yep",
"greppr" => "Greppr",
"crowdview" => "Crowdview",
"mwmbl" => "Mwmbl",
"mojeek" => "Mojeek",
"solofield" => "Solofield",
"marginalia" => "Marginalia",
"wiby" => "wiby",
"curlie" => "Curlie"
@ -964,6 +966,7 @@ class frontend{
"startpage" => "Startpage",
"qwant" => "Qwant",
"yep" => "Yep",
"solofield" => "Solofield",
//"pinterest" => "Pinterest",
"imgur" => "Imgur",
"ftm" => "FindThatMeme"
@ -982,7 +985,8 @@ class frontend{
"yandex" => "Yandex",
"google" => "Google",
"startpage" => "Startpage",
"qwant" => "Qwant"
"qwant" => "Qwant",
"solofield" => "Solofield"
]
];
break;

320
scraper/ghostery.php Normal file
View file

@ -0,0 +1,320 @@
<?php
class ghostery{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("ghostery");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
if($page != "web"){
return [];
}
return [
"country" => [
"display" => "Country",
"option" => [
"any" => "All regions",
"AR" => "Argentina",
"AU" => "Australia",
"AT" => "Austria",
"BE" => "Belgium",
"BR" => "Brazil",
"CA" => "Canada",
"CL" => "Chile",
"DK" => "Denmark",
"FI" => "Finland",
"FR" => "France",
"DE" => "Germany",
"HK" => "Hong Kong",
"IN" => "India",
"ID" => "Indonesia",
"IT" => "Italy",
"JP" => "Japan",
"KR" => "Korea",
"MY" => "Malaysia",
"MX" => "Mexico",
"NL" => "Netherlands",
"NZ" => "New Zealand",
"NO" => "Norway",
"CN" => "People's Republic of China",
"PL" => "Poland",
"PT" => "Portugal",
"PH" => "Republic of the Philippines",
"RU" => "Russia",
"SA" => "Saudi Arabia",
"ZA" => "South Africa",
"ES" => "Spain",
"SE" => "Sweden",
"CH" => "Switzerland",
"TW" => "Taiwan",
"TR" => "Turkey",
"GB" => "United Kingdom",
"US" => "United States"
]
]
];
}
private function get($proxy, $url, $get = [], $country){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://ghosterysearch.com",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Cookie: ctry=" . ($country == "any" ? "--" : $country) . "; noads=true",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1",
"Priority: u=0, i"]
);
// http2 bypass
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
[$query, $proxy] = $this->backend->get($get["npt"], "web");
parse_str($query, $query);
// country
$country = $query["c"];
unset($query["c"]);
$query = http_build_query($query);
try{
$html =
$this->get(
$proxy,
"https://ghosterysearch.com/search?" . $query,
[],
$country
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{
$proxy = $this->backend->get_ip();
try{
$html =
$this->get(
$proxy,
"https://ghosterysearch.com/search",
[
"q" => $get["s"]
],
$get["country"]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$this->fuckhtml->load($html);
$results_wrapper =
$this->fuckhtml
->getElementsByClassName(
"results",
"section"
);
if(count($results_wrapper) === 0){
throw new Exception("Failed to grep result section");
}
$this->fuckhtml->load($results_wrapper[0]);
// get search results
$results =
$this->fuckhtml
->getElementsByClassName(
"result",
"li"
);
if(count($results) === 0){
return $out;
}
foreach($results as $result){
$this->fuckhtml->load($result);
$a =
$this->fuckhtml
->getElementsByClassName(
"url",
"a"
);
if(count($a) === 0){
continue;
}
$a = $a[0];
$out["web"][] = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"h2"
)[0]
)
),
"description" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"p"
)[0]
)
),
"url" =>
$this->fuckhtml
->getTextContent(
$a
["attributes"]
["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
$this->fuckhtml->load($html);
// get pagination token
$pagination_wrapper =
$this->fuckhtml
->getElementsByClassName(
"pagination",
"div"
);
if(count($pagination_wrapper) !== 0){
// found next page!
$this->fuckhtml->load($pagination_wrapper[0]);
$a =
$this->fuckhtml
->getElementsByTagName(
"a"
);
if(count($a) !== 0){
$q =
parse_url(
$this->fuckhtml
->getTextContent(
$a[count($a) - 1]
["attributes"]
["href"]
),
PHP_URL_QUERY
);
$out["npt"] =
$this->backend
->store(
$q . "&c=" . $get["country"],
"web",
$proxy
);
}
}
return $out;
}
private function titledots($title){
return trim($title, " .\t\n\r\0\x0B…");
}
}

668
scraper/solofield.php Normal file
View file

@ -0,0 +1,668 @@
<?php
class solofield{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("solofield");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"no" => "No",
]
]
];
}
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://solofield.net",
"DNT: 1",
"Connection: keep-alive",
"Cookie: cross-site-cookie=name; lno=35842050",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
[$query, $proxy] = $this->backend->get($get["npt"], "web");
try{
$html =
$this->get(
$proxy,
"https://solofield.net/search?" . $query,
[]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{
$proxy = $this->backend->get_ip();
try{
$html =
$this->get(
$proxy,
"https://solofield.net/search",
[
"q" => $get["s"],
"ie" => "UTF-8",
"oe" => "UTF-8",
"hl" => "ja", // changing this doesnt do anything
"lr" => "lang_ja", // same here
//"ls" => "", // ??
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
// check for errors and load the result div
if($this->error_and_load($html)){
return $out;
}
$items =
$this->fuckhtml
->getElementsByClassName(
"g0",
"li"
);
foreach($items as $item){
$this->fuckhtml->load($item);
$title_tag =
$this->fuckhtml
->getElementsByClassName(
"r",
"h3"
);
if(count($title_tag) === 0){
continue;
}
$this->fuckhtml->load($title_tag[0]);
$link =
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"a"
)[0]
["attributes"]
["href"]
);
$this->fuckhtml->load($item);
$thumb =
$this->fuckhtml
->getElementsByClassName(
"webshot",
"img"
);
if(count($thumb) !== 0){
$uri =
$this->fuckhtml
->getTextContent(
$thumb[0]
["attributes"]
["src"]
);
if(stripos($uri, "now_printing") === false){
$thumb = [
"ratio" => "1:1",
"url" =>
"https://solofield.net" .
$this->fuckhtml
->getTextContent(
$thumb[0]
["attributes"]
["src"]
)
];
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
$out["web"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$title_tag[0]
),
"description" =>
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"s",
"div"
)[0]
),
"url" => $link,
"date" => null,
"type" => "web",
"thumb" => $thumb,
"sublink" => [],
"table" => []
];
}
// get next page
$this->get_npt($html, $proxy, $out, "web");
return $out;
}
public function image($get){
// no pagination
$html =
$this->get(
$this->backend->get_ip(),
"https://solofield.net/isearch",
[
"q" => $get["s"],
"ie" => "UTF-8",
"oe" => "UTF-8",
"hl" => "ja", // changing this doesnt do anything
//"lr" => "lang_ja", // same here
"ls" => "", // ??
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
]
);
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
// check for errors and load the result div
if($this->error_and_load($html)){
return $out;
}
$images =
$this->fuckhtml
->getElementsByTagName(
"li"
);
foreach($images as $image){
$this->fuckhtml->load($image);
$img =
$this->fuckhtml
->getElementsByTagName(
"img"
);
if(count($img) === 0){
// ?? invalid
continue;
}
$img = $img[0];
$size =
explode(
"x",
$this->fuckhtml
->getTextContent(
$image
),
2
);
$size = [
(int)trim($size[0]), // width
(int)trim($size[1]) // height
];
$out["image"][] = [
"title" => null,
"source" => [
[
"url" =>
"https://solofield.net/" .
$this->fuckhtml
->getTextContent(
$img["attributes"]["src"]
),
"width" => $size[0],
"height" => $size[1]
]
],
"url" =>
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"a"
)[0]
["attributes"]
["href"]
)
];
}
return $out;
}
public function video($get){
if($get["npt"]){
[$query, $proxy] = $this->backend->get($get["npt"], "videos");
try{
$html =
$this->get(
$proxy,
"https://solofield.net/vsearch?" . $query,
[]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{
$proxy = $this->backend->get_ip();
try{
$html =
$this->get(
$proxy,
"https://solofield.net/vsearch",
[
"q" => $get["s"],
"ie" => "UTF-8",
"oe" => "UTF-8",
"hl" => "ja", // changing this doesnt do anything
//"lr" => "lang_ja", // same here
"ls" => "", // ??
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}
$out = [
"status" => "ok",
"npt" => null,
"video" => [],
"author" => [],
"livestream" => [],
"playlist" => [],
"reel" => []
];
// check for errors and load the result div
if($this->error_and_load($html)){
return $out;
}
$items =
$this->fuckhtml
->getElementsByTagName(
"li"
);
foreach($items as $item){
$this->fuckhtml->load($item);
$as =
$this->fuckhtml
->getElementsByTagName(
"a"
);
if(count($as) === 0){
continue;
}
$thumb =
$this->fuckhtml
->getElementsByTagName(
"img"
);
if(count($thumb) !== 0){
$thumb = [
"ratio" => "16:9",
"url" =>
"https://solofield.net/" .
$thumb[0]
["attributes"]
["src"]
];
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
$date =
$this->fuckhtml
->getElementsByAttributeValue(
"style",
"font-size: 10px;",
"span"
);
if(count($date) !== 0){
$date =
$this->unfuckdate(
$this->fuckhtml
->getTextContent(
$date[0]
)
);
}else{
$date = null;
}
$center_td =
$this->fuckhtml
->getElementsByAttributeValue(
"align",
"center",
"td"
);
if(count($center_td) === 2){
$duration =
$this->fuckhtml
->getTextContent(
$this->hms2int(
$center_td[0]
)
);
}else{
$duration = null;
}
$out["video"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$as[1]
),
"description" => null,
"author" => [
"name" => null,
"url" => null,
"avatar" => null
],
"date" => $date,
"duration" => $duration,
"views" => null,
"thumb" => $thumb,
"url" =>
$this->fuckhtml
->getTextContent(
$as[0]
["attributes"]
["href"]
)
];
}
// get next page
$this->get_npt($html, $proxy, $out, "videos");
return $out;
}
private function get_npt($html, $proxy, &$out, $type){
// get next page
$this->fuckhtml->load($html);
$pjs =
$this->fuckhtml
->getElementById(
"pjs"
);
if($pjs){
$alnk =
$this->fuckhtml
->getElementsByClassName(
"alnk",
"span"
);
foreach($alnk as $lnk){
if(
stripos(
$this->fuckhtml
->getTextContent(
$lnk
),
"Next"
) !== false
){
$this->fuckhtml->load($lnk);
$out["npt"] =
$this->backend->store(
parse_url(
$this->fuckhtml
->getElementsByTagName(
"a"
)[0]
["attributes"]
["href"],
PHP_URL_QUERY
),
$type,
$proxy
);
}
}
}
}
private function error_and_load($html){
if(strlen($html) === 0){
throw new Exception("Solofield blocked the request IP");
}
$this->fuckhtml->load($html);
$list =
$this->fuckhtml
->getElementById(
"list",
"div"
);
if($list === false){
$nosearch =
$this->fuckhtml
->getElementById(
"nosearch",
"div"
);
if($nosearch){
return true;
}
throw new Exception("Failed to grep search list");
}
$this->fuckhtml->load($list);
return false;
}
private function unfuckdate($date){
return
strtotime(
rtrim(
preg_replace(
'/[^0-9]+/',
"-",
explode(
":",
$date,
2
)[1]
),
"-"
)
);
}
private function hms2int($time){
$parts = explode(":", $time, 3);
$time = 0;
if(count($parts) === 3){
// hours
$time = $time + ((int)$parts[0] * 3600);
array_shift($parts);
}
if(count($parts) === 2){
// minutes
$time = $time + ((int)$parts[0] * 60);
array_shift($parts);
}
// seconds
$time = $time + (int)$parts[0];
return $time;
}
}

View file

@ -91,6 +91,10 @@ $settings = [
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "ghostery",
"text" => "Ghostery"
],
[
"value" => "yep",
"text" => "Yep"
@ -137,6 +141,10 @@ $settings = [
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "ghostery",
"text" => "Ghostery"
],
[
"value" => "yep",
"text" => "Yep"
@ -157,6 +165,10 @@ $settings = [
"value" => "mojeek",
"text" => "Mojeek"
],
[
"value" => "solofield",
"text" => "Solofield"
],
[
"value" => "marginalia",
"text" => "Marginalia"
@ -203,6 +215,10 @@ $settings = [
"value" => "yep",
"text" => "Yep"
],
[
"value" => "solofield",
"text" => "Solofield"
],
/*[
"value" => "pinterest",
"text" => "Pinterest"
@ -249,6 +265,10 @@ $settings = [
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "solofield",
"text" => "Solofield"
]
]
],
[