1
Fork 0
mirror of https://git.lolcat.ca/lolcat/4get.git synced 2024-12-03 23:42:16 -05:00

Compare commits

...

5 commits

Author SHA1 Message Date
lolcat
08ed77f947 fucking .php again 2024-08-09 10:35:18 -04:00
lolcat
883a650f84 implement SSL check for botretards 2024-08-09 10:06:08 -04:00
lolcat
fbac3eeb8d fixed mwmbl, results are slightly better but wtf did they do to the sublinks my gawd 2024-08-08 03:29:29 -04:00
lolcat
36993013e5 fixed google piece of shit website i hate it so much 2024-08-02 21:25:39 -04:00
lolcat
beb08f46e2 fixed greppr 2024-08-02 19:40:44 -04:00
11 changed files with 545 additions and 218 deletions

View file

@ -19,7 +19,8 @@ class autocomplete{
"marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}", "marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}",
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}", "yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
"sc" => "", "sc" => "",
"startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english" "startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english",
"kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}"
]; ];
/* /*

View file

@ -63,6 +63,14 @@ class config{
//"via" //"via"
]; ];
// Block SSL ciphers used by CLI tools used for botting
// Basically a primitive version of Cloudflare's browser integrity check
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
const DISALLOWED_SSL = [
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
];
// Maximal number of searches per captcha key/pass issued. Counter gets // Maximal number of searches per captcha key/pass issued. Counter gets
// reset on every APCU cache clear (should happen once a day). // reset on every APCU cache clear (should happen once a day).
// Only useful when BOT_PROTECTION is NOT set to 0 // Only useful when BOT_PROTECTION is NOT set to 0
@ -111,7 +119,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things. // Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0"; const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0";
// Proxy pool assignments for each scraper // Proxy pool assignments for each scraper
// false = Use server's raw IP // false = Use server's raw IP

195
docs/apache2-example.md Normal file
View file

@ -0,0 +1,195 @@
# Sample Apache2 configuration
This is the apache2 configuration file used on the 4get.ca official instance, in hopes that it's useful to you!
Looking for the apache2 guide? <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">go here.</a>.
```xml
<VirtualHost *:443>
ServerName www.4get.ca
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
RedirectMatch 301 ^(.*)$ https://4get.ca$1
</VirtualHost>
<VirtualHost *:443>
ServerName 4get.ca
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/4get
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
<Directory /var/www/4get>
Options -MultiViews
AllowOverride All
Require all granted
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</Directory>
# deny access to private resources
<Directory /var/www/4get/data/>
Order Deny,allow
Deny from all
</Directory>
</VirtualHost>
<VirtualHost *:443>
ServerName www.lolcat.ca
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
</VirtualHost>
<VirtualHost *:443>
ServerName lolcat.ca
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/lolcat
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
<Directory /var/www/lolcat>
Options -MultiViews
AllowOverride All
Require all granted
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</Directory>
</VirtualHost>
<VirtualHost *:443>
ServerName www.nyym.co
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
RedirectMatch 301 ^(.*)$ https://nyym.co$1
</VirtualHost>
<VirtualHost *:443>
ServerName nyym.co
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/nyym
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
<Directory /var/www/nyym>
Options -MultiViews
AllowOverride All
Require all granted
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</Directory>
</VirtualHost>
<VirtualHost *:443>
ServerName git.lolcat.ca
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
ProxyPreserveHost On
ProxyRequests off
AllowEncodedSlashes NoDecode
ProxyPass / http://localhost:3000/ nocanon
</VirtualHost>
<VirtualHost *:443>
ServerName live.lolcat.ca
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/live
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
</VirtualHost>
```

View file

@ -74,7 +74,7 @@ Now, edit the following file: `/etc/apache2/sites-available/000-default.conf`, r
DocumentRoot /var/www/4get DocumentRoot /var/www/4get
Options +MultiViews Options -MultiViews
RewriteEngine On RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f RewriteCond %{REQUEST_FILENAME} !-f
@ -92,47 +92,56 @@ To make the above snippet work, please refer to our <a href="https://git.lolcat.
## default-ssl.conf ## default-ssl.conf
Now, edit the file `/etc/apache2/sites-available/default-ssl.conf`, remove everything and, again, add each rule while modifying the relevant fields: Now, edit the file `/etc/apache2/sites-available/default-ssl.conf`, remove everything and, again, add each rule while modifying the relevant fields:
This ruleset will redirect all clients that specify an unknown `Host` to the domain of our choice. I recommend you uncomment the `ErrorLog` directive while setting things up in case a problem occurs with PHP. Don't worry about the invalid SSL paths, we will generate our certificates later; Just make sure you specify the right domains in there: First, append the following redirect rule to point traffic from `www.4get.ca` to `4get.ca`:
```xml ```xml
<VirtualHost *:443> <VirtualHost *:443>
ServerName www.4get.ca
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
RedirectMatch 301 ^(.*)$ https://4get.ca$1 RedirectMatch 301 ^(.*)$ https://4get.ca$1
</VirtualHost>
```
This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private. `StdEnvVArs+` will make it so that PHP can view if the connection uses HTTPS, and which cipher was used. Useful for basic bot protection.
Make sure to replace `4get.ca` with your own domain under the `SSLCertificate*` directives!
```xml
<VirtualHost *:443>
ServerName 4get.ca
ServerAdmin will@lolcat.ca ServerAdmin will@lolcat.ca
DocumentRoot /var/www/4get
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log #ErrorLog ${APACHE_LOG_DIR}/error.log
SSLEngine on
<FilesMatch "\.(?:cgi|shtml|phtml|php)$">
SSLOptions +StdEnvVars
</FilesMatch>
<Directory /usr/lib/cgi-bin>
SSLOptions +StdEnvVars
</Directory>
AddOutputFilterByType DEFLATE application/json AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
</VirtualHost> SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
```
<Directory /var/www/4get>
This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private. Options -MultiViews
```xml AllowOverride All
<VirtualHost *:443> Require all granted
ServerName 4get.ca
RewriteEngine On
DocumentRoot /var/www/4get RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
Options +MultiViews RewriteRule ^([^\.]+)$ $1.php [NC,L]
RewriteEngine On </Directory>
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
# deny access to private resources # deny access to private resources
<Directory /var/www/4get/data/> <Directory /var/www/4get/data/>
@ -142,28 +151,7 @@ This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that
</VirtualHost> </VirtualHost>
``` ```
Don't forget to specify your other services here! Here's an example of a ruleset I use for `lolcat.ca`: By default, the first rule dictates where traffic should be redirected to in case the client specifies an unknown domain name. Don't forget your webserver's other rules! For a complete real-world example, please <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2-example.md">check out my real-world config file I use on 4get.ca</a>.
```xml
<VirtualHost *:443>
ServerName lolcat.ca
DocumentRoot /var/www/lolcat
Options +MultiViews
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</VirtualHost>
```
... Alongside with it's redirect rules.
```xml
<VirtualHost *:443>
ServerName www.lolcat.ca
RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
</VirtualHost>
```
## security.conf ## security.conf
If you enabled the `headers` module, you can head over to `/etc/apache2/conf-enabled/security.conf` and edit: If you enabled the `headers` module, you can head over to `/etc/apache2/conf-enabled/security.conf` and edit:

View file

@ -8,10 +8,10 @@ Welcome! This guide assumes that you have a working 4get instance. This will hel
3. The captcha imagesets are located in `data/captcha/your_image_set/*.png` 3. The captcha imagesets are located in `data/captcha/your_image_set/*.png`
4. The captcha font is located in `data/fonts/captcha.ttf` 4. The captcha font is located in `data/fonts/captcha.ttf`
# Cloudflare bypass # Cloudflare bypass (TLS check)
**Note: this only allows you to bypass the browser integrity checks. Captchas & javascript challenges will not be bypassed.** **Note: this only allows you to bypass the browser integrity checks. Captchas & javascript challenges will not be bypassed.**
Configuring this lets you fetch images sitting behind Cloudflare and allows you to scrape the **Yep** search engine. Following these instructions might make your package manager unhappy. Configuring this lets you fetch images sitting behind Cloudflare and allows you to scrape the **Yep** & the **Mwmbl** search engines. Please be aware that APT will fight against you and will re-install the openSSL-version of curl constantly when updating.
First, follow these instructions. Only install the Firefox modules: First, follow these instructions. Only install the Firefox modules:

View file

@ -89,6 +89,7 @@ class frontend{
$user_agent = ""; $user_agent = "";
$bad_header = false; $bad_header = false;
// block bots that present X-Forwarded-For, Via, etc
foreach($headers_raw as $headerkey => $headervalue){ foreach($headers_raw as $headerkey => $headervalue){
$headerkey = strtolower($headerkey); $headerkey = strtolower($headerkey);
@ -106,12 +107,27 @@ class frontend{
} }
} }
// SSL check
$bad_ssl = false;
if( if(
isset($_SERVER["https"]) &&
$_SERVER["https"] == "on" &&
isset($_SERVER["SSL_CIPHER"]) &&
in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
){
$bad_ssl = true;
}
if(
$bad_header === true ||
$bad_ssl === true ||
$user_agent == "" ||
// user agent check
preg_match( preg_match(
config::HEADER_REGEX, config::HEADER_REGEX,
$user_agent $user_agent
) || )
$bad_header === true
){ ){
// bot detected !! // bot detected !!

View file

@ -799,128 +799,147 @@ class google{
$title = "Notice"; $title = "Notice";
} }
$description = []; $div =
$as =
$this->fuckhtml $this->fuckhtml
->getElementsByTagName( ->getElementsByTagName(
"a" "div"
); );
if(count($as) !== 0){ // probe for related searches div, if found, ignore it cause its shit
$probe =
$this->fuckhtml
->getElementsByAttributeValue(
"role",
"list",
$div
);
// also probe for children
if(count($probe) === 0){
$first = true; $probe =
foreach($as as $a){
$text_link =
$this->fuckhtml
->getTextContent(
$a
);
if(stripos($text_link, "repeat the search") !== false){
$last_page = true;
break 2;
}
$parts =
explode(
$a["outerHTML"],
$card["innerHTML"],
2
);
$card["innerHTML"] = $parts[1];
$value =
preg_replace(
'/ +/',
" ",
$this->fuckhtml
->getTextContent(
$parts[0],
false,
false
)
);
if(strlen(trim($value)) !== 0){
$description[] = [
"type" => "text",
"value" => $value
];
if($first){
$description[0]["value"] =
ltrim($description[0]["value"]);
}
}
$first = false;
$description[] = [
"type" => "link",
"url" =>
$this->fuckhtml
->getTextContent(
$a["attributes"]
["href"]
),
"value" => $text_link
];
}
$text =
$this->fuckhtml $this->fuckhtml
->getTextContent( ->getElementsByClassName(
$card["innerHTML"], $this->getstyle(
false, [
false "flex-shrink" => "0",
"-moz-box-flex" => "0",
"flex-grow" => "0",
"overflow" => "hidden"
]
),
$div
); );
if(strlen(trim($text)) !== 0){
$description[] = [
"type" => "text",
"value" =>
rtrim(
$text
)
];
}
}else{
// @TODO: Check if this ever gets populated without giving me garbage
/*
$text =
$this->fuckhtml
->getTextContent(
$card
);
if($text != ""){
$description[] = [
"type" => "text",
"value" => $text
];
}*/
} }
if(count($description) !== 0){ if(count($probe) === 0){
$out["answer"][] = [ $description = [];
"title" => $title,
"description" => $description, $as =
"url" => null, $this->fuckhtml
"thumb" => null, ->getElementsByTagName(
"table" => [], "a"
"sublink" => [] );
];
if(count($as) !== 0){
$first = true;
foreach($as as $a){
$text_link =
$this->fuckhtml
->getTextContent(
$a
);
if(stripos($text_link, "repeat the search") !== false){
$last_page = true;
break 2;
}
$parts =
explode(
$a["outerHTML"],
$card["innerHTML"],
2
);
$card["innerHTML"] = $parts[1];
$value =
preg_replace(
'/ +/',
" ",
$this->fuckhtml
->getTextContent(
$parts[0],
false,
false
)
);
if(strlen(trim($value)) !== 0){
$description[] = [
"type" => "text",
"value" => $value
];
if($first){
$description[0]["value"] =
ltrim($description[0]["value"]);
}
}
$first = false;
$description[] = [
"type" => "link",
"url" =>
$this->fuckhtml
->getTextContent(
$a["attributes"]
["href"]
),
"value" => $text_link
];
}
$text =
$this->fuckhtml
->getTextContent(
$card["innerHTML"],
false,
false
);
if(strlen(trim($text)) !== 0){
$description[] = [
"type" => "text",
"value" =>
rtrim(
$text
)
];
}
}
if(count($description) !== 0){
$out["answer"][] = [
"title" => $title,
"description" => $description,
"url" => null,
"thumb" => null,
"table" => [],
"sublink" => []
];
}
} }
} }
@ -2451,6 +2470,7 @@ class google{
$this->getstyle( $this->getstyle(
[ [
"outline-offset" => "-1px", "outline-offset" => "-1px",
"outline-width" => "1px",
"display" => "flex", "display" => "flex",
"flex-direction" => "column", "flex-direction" => "column",
"flex-grow" => "1" "flex-grow" => "1"

View file

@ -298,9 +298,8 @@ class greppr{
$description = $description =
$this->fuckhtml $this->fuckhtml
->getElementsByFuzzyAttributeValue( ->getElementsByClassName(
"style", "highlightedDesc",
"color:#777777;",
"p" "p"
); );
@ -310,9 +309,11 @@ class greppr{
}else{ }else{
$description = $description =
$this->fuckhtml $this->limitstrlen(
->getTextContent( $this->fuckhtml
$description[0] ->getTextContent(
$description[0]
)
); );
} }
@ -325,7 +326,7 @@ class greppr{
$date = $date =
strtotime( strtotime(
explode( explode(
"Added:", ":",
$this->fuckhtml $this->fuckhtml
->getTextContent( ->getTextContent(
$date[count($date) - 1]["innerHTML"] $date[count($date) - 1]["innerHTML"]
@ -426,4 +427,9 @@ class greppr{
return $tokens; return $tokens;
} }
private function limitstrlen($text){
return explode("\n", wordwrap($text, 300, "\n"))[0];
}
} }

View file

@ -27,18 +27,24 @@ class mwmbl{
curl_setopt($curlproc, CURLOPT_URL, $url); curl_setopt($curlproc, CURLOPT_URL, $url);
// use http2
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT, ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5", "Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip", "Accept-Encoding: gzip",
"Referer: https://beta.mwmbl.org/",
"DNT: 1", "DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive", "Connection: keep-alive",
"Upgrade-Insecure-Requests: 1", "Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document", "Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate", "Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none", "Sec-Fetch-Site: same-origin",
"Priority: u=0, i",
"Sec-Fetch-User: ?1"] "Sec-Fetch-User: ?1"]
); );
@ -72,14 +78,14 @@ class mwmbl{
try{ try{
$html = $this->get( $html = $this->get(
$this->backend->get_ip(), // no next page! $this->backend->get_ip(), // no next page!
"https://mwmbl.org/app/home/", "https://beta.mwmbl.org/",
[ [
"q" => $search "q" => $search
] ]
); );
}catch(Exception $error){ }catch(Exception $error){
throw new Exception("Failed to fetch HTML"); throw new Exception("Failed to fetch HTML. If you're getting a timeout, make sure you have curl-impersonate setup.");
} }
$out = [ $out = [
@ -115,6 +121,68 @@ class mwmbl{
$this->fuckhtml $this->fuckhtml
->getElementsByTagName("p"); ->getElementsByTagName("p");
$sublinks = [];
$mores =
$this->fuckhtml
->getElementsByClassName(
"result-link-more",
"div"
);
foreach($mores as $more){
$this->fuckhtml->load($more);
$as =
$this->fuckhtml
->getElementsByClassName(
"more",
"a"
);
if(count($as) === 0){
// ?? invalid
continue;
}
$sublinks[] = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"more-title",
"span"
)[0]
)
),
"description" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"more-extract",
"span"
)[0]
)
),
"url" =>
$this->fuckhtml
->getTextContent(
$as[0]
["attributes"]
["href"]
)
];
}
// reset
$this->fuckhtml->load($result);
$out["web"][] = [ $out["web"][] = [
"title" => "title" =>
$this->titledots( $this->titledots(
@ -153,7 +221,7 @@ class mwmbl{
"url" => null, "url" => null,
"ratio" => null "ratio" => null
], ],
"sublink" => [], "sublink" => $sublinks,
"table" => [] "table" => []
]; ];
} }

View file

@ -13,7 +13,7 @@ class pinterest{
return []; return [];
} }
private function get($url, $get = []){ private function get($proxy, $url, $get = []){
$curlproc = curl_init(); $curlproc = curl_init();
@ -45,7 +45,7 @@ class pinterest{
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->proxy->assign_proxy($curlproc); $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc); $data = curl_exec($curlproc);
@ -60,45 +60,63 @@ class pinterest{
public function image($get){ public function image($get){
$search = $get["s"]; if($get["npt"]){
$out = [ // @TODO
"status" => "ok", // post data for next page
"npt" => null, $data = [
"image" => [] "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
]; "data" =>
json_encode(
$filter = [ [
"source_url" => "/search/pins/?q=" . urlencode($search), // {"options":{"applied_filters":null,"appliedProductFilters":"---","article":null,"auto_correction_disabled":false,"corpus":null,"customized_rerank_type":null,"domains":null,"filters":null,"journey_depth":null,"page_size":null,"price_max":null,"price_min":null,"query_pin_sigs":null,"query":"higurashi","redux_normalize_feed":true,"rs":"typed","scope":"pins","selected_one_bar_modules":null,"source_id":null,"source_module_id":null,"top_pin_id":null,"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1ZsWlVRbXhpVmtreVZsZHpOV0pIU2tkV2FscFhVbXhhVkZreU1WSmtNREZWVjIxR1RrMXNTbEJXYlhSaFVtMVdjMVZ1U2xaaWEzQnpXVlJPVTJWV1pISlhhM1JYVm10V05sVldVbE5XVjBwMVVXMUdWVll6VFhoVWJYaFhWMVp3Ums1V1RsTmlSbGt5Vm10YWFtVkdWbkpOU0dSUFZsZG9XRmxzWkc5VlZscHlWbGhrYkdKR1NubFdWelZQWVVaYWRHVkVRbFppUmtwVVZrUktWMlJIVWtWV2JHaHBVakZLU0Zkc1pEUmtNVnBZVW10b2FsSXdXbkJXYlRWRFpHeGFSMWRzVG1oaGVrWllXV3RvVTFVeFpFaFZiRUpoVm5wRk1GbHFSbXRYVjA1R1YyczFWMVpHV2pSWFZtaDNVakZrY2sxWVRsaGlhM0JXV1ZSR1MyRkdiRlZTYm1SVVVteHdXbGxWVlRGVk1VbDVWRmhrVjAxdVVuWlVhMXBTWlVaT2MxcEhSbE5TTWswMVdtdGFWMU5YU2paVmJYaFRUVmhDUjFZeU5YZFVNVkY0VjJ0b1ZXRnJOVlpVVmxwTFVURndXR042VmxOV2ExcGFXVlZWTlZVeFNYZE5WRTVYVWtWYVZGWkhNVTlXTVU1WllVWk9hR1ZyV2s1WFZ6QXhZakpPVjFWWWFHRlNWbkJRVm14U1IwMUdXWGxOVkVKVlRWWnNORll5TURWV1YwVjVWV3hDV21FeGNETmFSVnByVjFkS1IyTkhhR2xYUjJkM1ZtdGFhMlF4VVhsVGJGcE9Wa1p3YjFwWGVFdFZWbFp4VW14YWJGWnRVbHBaTUdoTFZHMUtTR1ZJYUZkV2VrWjJWMVphU21ReVJYcGpSbFpwVW10d1RGZHJVa0pPVms1SFZHNVNUbFl3V2xoVmJYUldaVVpaZUZremFGUk5hM0JYVkZaYVYyRkZNSGxWYkVKYVlrWlZlRnBGV210WFIwNUpVMnMxVTFaR1dscFdWekI0VFVaV1IxTllaR3BUUlhCb1dWUkdWbVZHVm5SbFJuQnNZbFpKTWxSVlVYaFBSVGxGV1hwR1QyVnJSVEZVVlZKT1RrVXhSVkpVUWs5bGJFVXhWRmhzZDFOR1ZsWmtNMFp0VWpGYWIxZFhjRXBsUlRGSVZWaHdUbFl4YTNoVVZWSnFUVVUxV0ZadGFFOVNSVnB6Vkd0a1drMUdiRFpUVkVaT1pXMWplRmRzVWxkaFJuQllWVlJTVDJWdFRqWlVNVkpTWlZad2NWcEhkRTlsYTFwMFZGVlNhMkpWTVZWVFZFcE9Wa1pzTmxkWE1WSk9WVEYwVlcweFVGWXdXVFJXUjNSWFYwZGFRbEJVTVRoUFJHTXhUbnBCTlUxRVRUUk5SRVV3VG5wUk5VMTVjRWhWVlhkeFprUlZlRTlFVVRKWlZHc3lUMWRSTWsxVVVUSk9iVnBvV1RKWmVrNTZXWGhPTWs1cFQwUkZNVTlFVm1sTlZGcHBUV3BTYTFsWFRtcE9SR015VG1wVk5GbHFaR2haVjFacldWUmFiVmxxWkdoYVZGWnFUa1JXT0ZSclZsaG1RVDA5fFVIbzVhRkpYZUc1WFYyUlpWVEpHYkdGNk1XWk5ha1ptVFZSR09FOUVZekZPZWtFMVRVUk5ORTFFUlRCT2VsRTFUWGx3U0ZWVmQzRm1SMWw1VFZSUk1WbDZUVEJhUjFGNVQxZFNhVnB0VlRGT1JFVXdXVlJuZVU1cVRUUk5hbU40VDBSSk1VNXFWVEZOYlZwcVdsUnJlRTFFVVhwWmVsVjNXbXBvYkU1dFJYbE9ha0Y2VDFSSk5VMTZWVEJaYWtJNFZHdFdXR1pCUFQwPXxOb25lfDg3NTcwOTAzODAxNDc0OTMqR1FMKnwzMjM3YjM3ZGNhMGU3YjYyYzYzYzAyZGJkNGU1MjdlNzMyMTExMTNlMmUyMzEyOWM2MDAzYmU1ZTlmZjkwYjAwfE5FV3w="]},"context":{}}
"rs" => "typed", ]
"data" => );
json_encode( ];
[
"options" => [ }else{
"article" => null,
"applied_filters" => null, $search = $get["s"];
"appliedProductFilters" => "---", if(strlen($search) === 0){
"auto_correction_disabled" => false,
"corpus" => null, throw new Exception("Search term is empty!");
"customized_rerank_type" => null, }
"filters" => null,
"query" => $search, $filter = [
"query_pin_sigs" => null, "source_url" => "/search/pins/?q=" . urlencode($search),
"redux_normalize_feed" => true, "rs" => "typed",
"rs" => "typed", "data" =>
"scope" => "pins", // pins, boards, videos, json_encode(
"source_id" => null [
], "options" => [
"context" => [] "article" => null,
] "applied_filters" => null,
), "appliedProductFilters" => "---",
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1) "auto_correction_disabled" => false,
]; "corpus" => null,
"customized_rerank_type" => null,
"filters" => null,
"query" => $search,
"query_pin_sigs" => null,
"redux_normalize_feed" => true,
"rs" => "typed",
"scope" => "pins", // pins, boards, videos,
"source_id" => null
],
"context" => []
]
),
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
];
$proxy = $this->backend->get_ip();
}
try{ try{
$json = $json =
json_decode( json_decode(
$this->get( $this->get(
$proxy,
"https://www.pinterest.ca/resource/BaseSearchResource/get/", "https://www.pinterest.ca/resource/BaseSearchResource/get/",
$filter $filter
), ),
@ -115,7 +133,11 @@ class pinterest{
throw new Exception("Failed to decode JSON"); throw new Exception("Failed to decode JSON");
} }
//print_r($json); $out = [
"status" => "ok",
"npt" => null,
"image" => []
];
foreach( foreach(
$json $json
@ -189,7 +211,6 @@ class pinterest{
break; break;
case "board": case "board":
if(isset($item["cover_pin"]["image_url"])){ if(isset($item["cover_pin"]["image_url"])){
$image = [ $image = [

View file

@ -83,6 +83,10 @@ $settings = [
"value" => "startpage", "value" => "startpage",
"text" => "Startpage" "text" => "Startpage"
], ],
[
"value" => "kagi",
"text" => "Kagi"
],
[ [
"value" => "qwant", "value" => "qwant",
"text" => "Qwant" "text" => "Qwant"