mirror of
https://git.lolcat.ca/lolcat/4get.git
synced 2024-12-03 23:42:16 -05:00
Compare commits
5 commits
4e4796bb71
...
08ed77f947
Author | SHA1 | Date | |
---|---|---|---|
|
08ed77f947 | ||
|
883a650f84 | ||
|
fbac3eeb8d | ||
|
36993013e5 | ||
|
beb08f46e2 |
11 changed files with 545 additions and 218 deletions
|
@ -19,7 +19,8 @@ class autocomplete{
|
|||
"marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}",
|
||||
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
|
||||
"sc" => "",
|
||||
"startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english"
|
||||
"startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english",
|
||||
"kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}"
|
||||
];
|
||||
|
||||
/*
|
||||
|
|
|
@ -63,6 +63,14 @@ class config{
|
|||
//"via"
|
||||
];
|
||||
|
||||
// Block SSL ciphers used by CLI tools used for botting
|
||||
// Basically a primitive version of Cloudflare's browser integrity check
|
||||
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
|
||||
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
|
||||
const DISALLOWED_SSL = [
|
||||
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
|
||||
];
|
||||
|
||||
// Maximal number of searches per captcha key/pass issued. Counter gets
|
||||
// reset on every APCU cache clear (should happen once a day).
|
||||
// Only useful when BOT_PROTECTION is NOT set to 0
|
||||
|
@ -111,7 +119,7 @@ class config{
|
|||
|
||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||
// Changing this might break things.
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0";
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0";
|
||||
|
||||
// Proxy pool assignments for each scraper
|
||||
// false = Use server's raw IP
|
||||
|
|
195
docs/apache2-example.md
Normal file
195
docs/apache2-example.md
Normal file
|
@ -0,0 +1,195 @@
|
|||
# Sample Apache2 configuration
|
||||
This is the apache2 configuration file used on the 4get.ca official instance, in hopes that it's useful to you!
|
||||
|
||||
Looking for the apache2 guide? <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">go here.</a>.
|
||||
|
||||
```xml
|
||||
<VirtualHost *:443>
|
||||
ServerName www.4get.ca
|
||||
|
||||
SSLEngine On
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
|
||||
RedirectMatch 301 ^(.*)$ https://4get.ca$1
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName 4get.ca
|
||||
|
||||
ServerAdmin will@lolcat.ca
|
||||
DocumentRoot /var/www/4get
|
||||
|
||||
SSLEngine On
|
||||
SSLOptions +StdEnvVars
|
||||
|
||||
#ErrorLog ${APACHE_LOG_DIR}/error.log
|
||||
|
||||
AddOutputFilterByType DEFLATE application/json
|
||||
AddOutputFilterByType DEFLATE application/javascript
|
||||
AddOutputFilterByType DEFLATE application/x-javascript
|
||||
AddOutputFilterByType DEFLATE text/html
|
||||
AddOutputFilterByType DEFLATE text/plain
|
||||
AddOutputFilterByType DEFLATE text/css
|
||||
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
|
||||
<Directory /var/www/4get>
|
||||
Options -MultiViews
|
||||
AllowOverride All
|
||||
Require all granted
|
||||
|
||||
RewriteEngine On
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteRule ^([^\.]+)$ $1.php [NC,L]
|
||||
</Directory>
|
||||
|
||||
# deny access to private resources
|
||||
<Directory /var/www/4get/data/>
|
||||
Order Deny,allow
|
||||
Deny from all
|
||||
</Directory>
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName www.lolcat.ca
|
||||
|
||||
SSLEngine On
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
|
||||
RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName lolcat.ca
|
||||
|
||||
ServerAdmin will@lolcat.ca
|
||||
DocumentRoot /var/www/lolcat
|
||||
|
||||
SSLEngine On
|
||||
SSLOptions +StdEnvVars
|
||||
|
||||
#ErrorLog ${APACHE_LOG_DIR}/error.log
|
||||
|
||||
AddOutputFilterByType DEFLATE application/json
|
||||
AddOutputFilterByType DEFLATE application/javascript
|
||||
AddOutputFilterByType DEFLATE application/x-javascript
|
||||
AddOutputFilterByType DEFLATE text/html
|
||||
AddOutputFilterByType DEFLATE text/plain
|
||||
AddOutputFilterByType DEFLATE text/css
|
||||
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
|
||||
<Directory /var/www/lolcat>
|
||||
Options -MultiViews
|
||||
AllowOverride All
|
||||
Require all granted
|
||||
|
||||
RewriteEngine On
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteRule ^([^\.]+)$ $1.php [NC,L]
|
||||
</Directory>
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName www.nyym.co
|
||||
|
||||
SSLEngine On
|
||||
SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
|
||||
|
||||
RedirectMatch 301 ^(.*)$ https://nyym.co$1
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName nyym.co
|
||||
|
||||
ServerAdmin will@lolcat.ca
|
||||
DocumentRoot /var/www/nyym
|
||||
|
||||
SSLEngine On
|
||||
SSLOptions +StdEnvVars
|
||||
|
||||
#ErrorLog ${APACHE_LOG_DIR}/error.log
|
||||
|
||||
AddOutputFilterByType DEFLATE application/json
|
||||
AddOutputFilterByType DEFLATE application/javascript
|
||||
AddOutputFilterByType DEFLATE application/x-javascript
|
||||
AddOutputFilterByType DEFLATE text/html
|
||||
AddOutputFilterByType DEFLATE text/plain
|
||||
AddOutputFilterByType DEFLATE text/css
|
||||
|
||||
SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
|
||||
|
||||
<Directory /var/www/nyym>
|
||||
Options -MultiViews
|
||||
AllowOverride All
|
||||
Require all granted
|
||||
|
||||
RewriteEngine On
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteRule ^([^\.]+)$ $1.php [NC,L]
|
||||
</Directory>
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName git.lolcat.ca
|
||||
|
||||
SSLEngine On
|
||||
SSLOptions +StdEnvVars
|
||||
|
||||
#ErrorLog ${APACHE_LOG_DIR}/error.log
|
||||
|
||||
AddOutputFilterByType DEFLATE application/json
|
||||
AddOutputFilterByType DEFLATE application/javascript
|
||||
AddOutputFilterByType DEFLATE application/x-javascript
|
||||
AddOutputFilterByType DEFLATE text/html
|
||||
AddOutputFilterByType DEFLATE text/plain
|
||||
AddOutputFilterByType DEFLATE text/css
|
||||
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
|
||||
ProxyPreserveHost On
|
||||
ProxyRequests off
|
||||
AllowEncodedSlashes NoDecode
|
||||
ProxyPass / http://localhost:3000/ nocanon
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName live.lolcat.ca
|
||||
|
||||
ServerAdmin will@lolcat.ca
|
||||
DocumentRoot /var/www/live
|
||||
|
||||
SSLEngine On
|
||||
SSLOptions +StdEnvVars
|
||||
|
||||
#ErrorLog ${APACHE_LOG_DIR}/error.log
|
||||
|
||||
AddOutputFilterByType DEFLATE application/json
|
||||
AddOutputFilterByType DEFLATE application/javascript
|
||||
AddOutputFilterByType DEFLATE application/x-javascript
|
||||
AddOutputFilterByType DEFLATE text/html
|
||||
AddOutputFilterByType DEFLATE text/plain
|
||||
AddOutputFilterByType DEFLATE text/css
|
||||
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
</VirtualHost>
|
||||
```
|
|
@ -74,7 +74,7 @@ Now, edit the following file: `/etc/apache2/sites-available/000-default.conf`, r
|
|||
|
||||
DocumentRoot /var/www/4get
|
||||
|
||||
Options +MultiViews
|
||||
Options -MultiViews
|
||||
RewriteEngine On
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
|
@ -92,47 +92,56 @@ To make the above snippet work, please refer to our <a href="https://git.lolcat.
|
|||
## default-ssl.conf
|
||||
Now, edit the file `/etc/apache2/sites-available/default-ssl.conf`, remove everything and, again, add each rule while modifying the relevant fields:
|
||||
|
||||
This ruleset will redirect all clients that specify an unknown `Host` to the domain of our choice. I recommend you uncomment the `ErrorLog` directive while setting things up in case a problem occurs with PHP. Don't worry about the invalid SSL paths, we will generate our certificates later; Just make sure you specify the right domains in there:
|
||||
First, append the following redirect rule to point traffic from `www.4get.ca` to `4get.ca`:
|
||||
```xml
|
||||
<VirtualHost *:443>
|
||||
ServerName www.4get.ca
|
||||
|
||||
SSLEngine On
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
|
||||
RedirectMatch 301 ^(.*)$ https://4get.ca$1
|
||||
</VirtualHost>
|
||||
```
|
||||
|
||||
This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private. `StdEnvVArs+` will make it so that PHP can view if the connection uses HTTPS, and which cipher was used. Useful for basic bot protection.
|
||||
|
||||
Make sure to replace `4get.ca` with your own domain under the `SSLCertificate*` directives!
|
||||
```xml
|
||||
<VirtualHost *:443>
|
||||
ServerName 4get.ca
|
||||
|
||||
ServerAdmin will@lolcat.ca
|
||||
|
||||
DocumentRoot /var/www/4get
|
||||
|
||||
SSLEngine On
|
||||
SSLOptions +StdEnvVars
|
||||
|
||||
#ErrorLog ${APACHE_LOG_DIR}/error.log
|
||||
|
||||
SSLEngine on
|
||||
|
||||
<FilesMatch "\.(?:cgi|shtml|phtml|php)$">
|
||||
SSLOptions +StdEnvVars
|
||||
</FilesMatch>
|
||||
<Directory /usr/lib/cgi-bin>
|
||||
SSLOptions +StdEnvVars
|
||||
</Directory>
|
||||
|
||||
|
||||
AddOutputFilterByType DEFLATE application/json
|
||||
AddOutputFilterByType DEFLATE application/javascript
|
||||
AddOutputFilterByType DEFLATE application/x-javascript
|
||||
AddOutputFilterByType DEFLATE text/html
|
||||
AddOutputFilterByType DEFLATE text/plain
|
||||
AddOutputFilterByType DEFLATE text/css
|
||||
|
||||
|
||||
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
|
||||
</VirtualHost>
|
||||
```
|
||||
|
||||
This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private.
|
||||
```xml
|
||||
<VirtualHost *:443>
|
||||
ServerName 4get.ca
|
||||
|
||||
DocumentRoot /var/www/4get
|
||||
|
||||
Options +MultiViews
|
||||
RewriteEngine On
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteRule ^([^\.]+)$ $1.php [NC,L]
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
|
||||
|
||||
<Directory /var/www/4get>
|
||||
Options -MultiViews
|
||||
AllowOverride All
|
||||
Require all granted
|
||||
|
||||
RewriteEngine On
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteRule ^([^\.]+)$ $1.php [NC,L]
|
||||
</Directory>
|
||||
|
||||
# deny access to private resources
|
||||
<Directory /var/www/4get/data/>
|
||||
|
@ -142,28 +151,7 @@ This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that
|
|||
</VirtualHost>
|
||||
```
|
||||
|
||||
Don't forget to specify your other services here! Here's an example of a ruleset I use for `lolcat.ca`:
|
||||
```xml
|
||||
<VirtualHost *:443>
|
||||
ServerName lolcat.ca
|
||||
|
||||
DocumentRoot /var/www/lolcat
|
||||
|
||||
Options +MultiViews
|
||||
RewriteEngine On
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteRule ^([^\.]+)$ $1.php [NC,L]
|
||||
</VirtualHost>
|
||||
```
|
||||
|
||||
... Alongside with it's redirect rules.
|
||||
```xml
|
||||
<VirtualHost *:443>
|
||||
ServerName www.lolcat.ca
|
||||
RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
|
||||
</VirtualHost>
|
||||
```
|
||||
By default, the first rule dictates where traffic should be redirected to in case the client specifies an unknown domain name. Don't forget your webserver's other rules! For a complete real-world example, please <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2-example.md">check out my real-world config file I use on 4get.ca</a>.
|
||||
|
||||
## security.conf
|
||||
If you enabled the `headers` module, you can head over to `/etc/apache2/conf-enabled/security.conf` and edit:
|
||||
|
|
|
@ -8,10 +8,10 @@ Welcome! This guide assumes that you have a working 4get instance. This will hel
|
|||
3. The captcha imagesets are located in `data/captcha/your_image_set/*.png`
|
||||
4. The captcha font is located in `data/fonts/captcha.ttf`
|
||||
|
||||
# Cloudflare bypass
|
||||
# Cloudflare bypass (TLS check)
|
||||
**Note: this only allows you to bypass the browser integrity checks. Captchas & javascript challenges will not be bypassed.**
|
||||
|
||||
Configuring this lets you fetch images sitting behind Cloudflare and allows you to scrape the **Yep** search engine. Following these instructions might make your package manager unhappy.
|
||||
Configuring this lets you fetch images sitting behind Cloudflare and allows you to scrape the **Yep** & the **Mwmbl** search engines. Please be aware that APT will fight against you and will re-install the openSSL-version of curl constantly when updating.
|
||||
|
||||
First, follow these instructions. Only install the Firefox modules:
|
||||
|
||||
|
|
|
@ -89,6 +89,7 @@ class frontend{
|
|||
$user_agent = "";
|
||||
$bad_header = false;
|
||||
|
||||
// block bots that present X-Forwarded-For, Via, etc
|
||||
foreach($headers_raw as $headerkey => $headervalue){
|
||||
|
||||
$headerkey = strtolower($headerkey);
|
||||
|
@ -106,12 +107,27 @@ class frontend{
|
|||
}
|
||||
}
|
||||
|
||||
// SSL check
|
||||
$bad_ssl = false;
|
||||
if(
|
||||
isset($_SERVER["https"]) &&
|
||||
$_SERVER["https"] == "on" &&
|
||||
isset($_SERVER["SSL_CIPHER"]) &&
|
||||
in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
|
||||
){
|
||||
|
||||
$bad_ssl = true;
|
||||
}
|
||||
|
||||
if(
|
||||
$bad_header === true ||
|
||||
$bad_ssl === true ||
|
||||
$user_agent == "" ||
|
||||
// user agent check
|
||||
preg_match(
|
||||
config::HEADER_REGEX,
|
||||
$user_agent
|
||||
) ||
|
||||
$bad_header === true
|
||||
)
|
||||
){
|
||||
|
||||
// bot detected !!
|
||||
|
|
|
@ -799,128 +799,147 @@ class google{
|
|||
$title = "Notice";
|
||||
}
|
||||
|
||||
$description = [];
|
||||
|
||||
$as =
|
||||
$div =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
"div"
|
||||
);
|
||||
|
||||
if(count($as) !== 0){
|
||||
// probe for related searches div, if found, ignore it cause its shit
|
||||
$probe =
|
||||
$this->fuckhtml
|
||||
->getElementsByAttributeValue(
|
||||
"role",
|
||||
"list",
|
||||
$div
|
||||
);
|
||||
|
||||
// also probe for children
|
||||
if(count($probe) === 0){
|
||||
|
||||
$first = true;
|
||||
|
||||
foreach($as as $a){
|
||||
|
||||
$text_link =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a
|
||||
);
|
||||
|
||||
if(stripos($text_link, "repeat the search") !== false){
|
||||
|
||||
$last_page = true;
|
||||
break 2;
|
||||
}
|
||||
|
||||
$parts =
|
||||
explode(
|
||||
$a["outerHTML"],
|
||||
$card["innerHTML"],
|
||||
2
|
||||
);
|
||||
|
||||
$card["innerHTML"] = $parts[1];
|
||||
|
||||
$value =
|
||||
preg_replace(
|
||||
'/ +/',
|
||||
" ",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$parts[0],
|
||||
false,
|
||||
false
|
||||
)
|
||||
);
|
||||
|
||||
if(strlen(trim($value)) !== 0){
|
||||
|
||||
$description[] = [
|
||||
"type" => "text",
|
||||
"value" => $value
|
||||
];
|
||||
|
||||
if($first){
|
||||
|
||||
$description[0]["value"] =
|
||||
ltrim($description[0]["value"]);
|
||||
}
|
||||
}
|
||||
|
||||
$first = false;
|
||||
|
||||
$description[] = [
|
||||
"type" => "link",
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]
|
||||
["href"]
|
||||
),
|
||||
"value" => $text_link
|
||||
];
|
||||
}
|
||||
|
||||
$text =
|
||||
$probe =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$card["innerHTML"],
|
||||
false,
|
||||
false
|
||||
->getElementsByClassName(
|
||||
$this->getstyle(
|
||||
[
|
||||
"flex-shrink" => "0",
|
||||
"-moz-box-flex" => "0",
|
||||
"flex-grow" => "0",
|
||||
"overflow" => "hidden"
|
||||
]
|
||||
),
|
||||
$div
|
||||
);
|
||||
|
||||
if(strlen(trim($text)) !== 0){
|
||||
|
||||
$description[] = [
|
||||
"type" => "text",
|
||||
"value" =>
|
||||
rtrim(
|
||||
$text
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
}else{
|
||||
|
||||
// @TODO: Check if this ever gets populated without giving me garbage
|
||||
/*
|
||||
$text =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$card
|
||||
);
|
||||
|
||||
if($text != ""){
|
||||
$description[] = [
|
||||
"type" => "text",
|
||||
"value" => $text
|
||||
];
|
||||
}*/
|
||||
}
|
||||
|
||||
if(count($description) !== 0){
|
||||
if(count($probe) === 0){
|
||||
|
||||
$out["answer"][] = [
|
||||
"title" => $title,
|
||||
"description" => $description,
|
||||
"url" => null,
|
||||
"thumb" => null,
|
||||
"table" => [],
|
||||
"sublink" => []
|
||||
];
|
||||
$description = [];
|
||||
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($as) !== 0){
|
||||
|
||||
$first = true;
|
||||
|
||||
foreach($as as $a){
|
||||
|
||||
$text_link =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a
|
||||
);
|
||||
|
||||
if(stripos($text_link, "repeat the search") !== false){
|
||||
|
||||
$last_page = true;
|
||||
break 2;
|
||||
}
|
||||
|
||||
$parts =
|
||||
explode(
|
||||
$a["outerHTML"],
|
||||
$card["innerHTML"],
|
||||
2
|
||||
);
|
||||
|
||||
$card["innerHTML"] = $parts[1];
|
||||
|
||||
$value =
|
||||
preg_replace(
|
||||
'/ +/',
|
||||
" ",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$parts[0],
|
||||
false,
|
||||
false
|
||||
)
|
||||
);
|
||||
|
||||
if(strlen(trim($value)) !== 0){
|
||||
|
||||
$description[] = [
|
||||
"type" => "text",
|
||||
"value" => $value
|
||||
];
|
||||
|
||||
if($first){
|
||||
|
||||
$description[0]["value"] =
|
||||
ltrim($description[0]["value"]);
|
||||
}
|
||||
}
|
||||
|
||||
$first = false;
|
||||
|
||||
$description[] = [
|
||||
"type" => "link",
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]
|
||||
["href"]
|
||||
),
|
||||
"value" => $text_link
|
||||
];
|
||||
}
|
||||
|
||||
$text =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$card["innerHTML"],
|
||||
false,
|
||||
false
|
||||
);
|
||||
|
||||
if(strlen(trim($text)) !== 0){
|
||||
|
||||
$description[] = [
|
||||
"type" => "text",
|
||||
"value" =>
|
||||
rtrim(
|
||||
$text
|
||||
)
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
if(count($description) !== 0){
|
||||
|
||||
$out["answer"][] = [
|
||||
"title" => $title,
|
||||
"description" => $description,
|
||||
"url" => null,
|
||||
"thumb" => null,
|
||||
"table" => [],
|
||||
"sublink" => []
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2451,6 +2470,7 @@ class google{
|
|||
$this->getstyle(
|
||||
[
|
||||
"outline-offset" => "-1px",
|
||||
"outline-width" => "1px",
|
||||
"display" => "flex",
|
||||
"flex-direction" => "column",
|
||||
"flex-grow" => "1"
|
||||
|
|
|
@ -298,9 +298,8 @@ class greppr{
|
|||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByFuzzyAttributeValue(
|
||||
"style",
|
||||
"color:#777777;",
|
||||
->getElementsByClassName(
|
||||
"highlightedDesc",
|
||||
"p"
|
||||
);
|
||||
|
||||
|
@ -310,9 +309,11 @@ class greppr{
|
|||
}else{
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
$this->limitstrlen(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -325,7 +326,7 @@ class greppr{
|
|||
$date =
|
||||
strtotime(
|
||||
explode(
|
||||
"Added:",
|
||||
":",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$date[count($date) - 1]["innerHTML"]
|
||||
|
@ -426,4 +427,9 @@ class greppr{
|
|||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
private function limitstrlen($text){
|
||||
|
||||
return explode("\n", wordwrap($text, 300, "\n"))[0];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,18 +27,24 @@ class mwmbl{
|
|||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
// use http2
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://beta.mwmbl.org/",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Priority: u=0, i",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
|
@ -72,14 +78,14 @@ class mwmbl{
|
|||
try{
|
||||
$html = $this->get(
|
||||
$this->backend->get_ip(), // no next page!
|
||||
"https://mwmbl.org/app/home/",
|
||||
"https://beta.mwmbl.org/",
|
||||
[
|
||||
"q" => $search
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch HTML");
|
||||
throw new Exception("Failed to fetch HTML. If you're getting a timeout, make sure you have curl-impersonate setup.");
|
||||
}
|
||||
|
||||
$out = [
|
||||
|
@ -115,6 +121,68 @@ class mwmbl{
|
|||
$this->fuckhtml
|
||||
->getElementsByTagName("p");
|
||||
|
||||
$sublinks = [];
|
||||
|
||||
$mores =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"result-link-more",
|
||||
"div"
|
||||
);
|
||||
|
||||
foreach($mores as $more){
|
||||
|
||||
$this->fuckhtml->load($more);
|
||||
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more",
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($as) === 0){
|
||||
|
||||
// ?? invalid
|
||||
continue;
|
||||
}
|
||||
|
||||
$sublinks[] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more-title",
|
||||
"span"
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more-extract",
|
||||
"span"
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$as[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
// reset
|
||||
$this->fuckhtml->load($result);
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
|
@ -153,7 +221,7 @@ class mwmbl{
|
|||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"sublink" => $sublinks,
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ class pinterest{
|
|||
return [];
|
||||
}
|
||||
|
||||
private function get($url, $get = []){
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
|
@ -45,7 +45,7 @@ class pinterest{
|
|||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->proxy->assign_proxy($curlproc);
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
|
@ -60,45 +60,63 @@ class pinterest{
|
|||
|
||||
public function image($get){
|
||||
|
||||
$search = $get["s"];
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
$filter = [
|
||||
"source_url" => "/search/pins/?q=" . urlencode($search),
|
||||
"rs" => "typed",
|
||||
"data" =>
|
||||
json_encode(
|
||||
[
|
||||
"options" => [
|
||||
"article" => null,
|
||||
"applied_filters" => null,
|
||||
"appliedProductFilters" => "---",
|
||||
"auto_correction_disabled" => false,
|
||||
"corpus" => null,
|
||||
"customized_rerank_type" => null,
|
||||
"filters" => null,
|
||||
"query" => $search,
|
||||
"query_pin_sigs" => null,
|
||||
"redux_normalize_feed" => true,
|
||||
"rs" => "typed",
|
||||
"scope" => "pins", // pins, boards, videos,
|
||||
"source_id" => null
|
||||
],
|
||||
"context" => []
|
||||
]
|
||||
),
|
||||
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
|
||||
];
|
||||
if($get["npt"]){
|
||||
|
||||
// @TODO
|
||||
// post data for next page
|
||||
$data = [
|
||||
"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
|
||||
"data" =>
|
||||
json_encode(
|
||||
[
|
||||
// {"options":{"applied_filters":null,"appliedProductFilters":"---","article":null,"auto_correction_disabled":false,"corpus":null,"customized_rerank_type":null,"domains":null,"filters":null,"journey_depth":null,"page_size":null,"price_max":null,"price_min":null,"query_pin_sigs":null,"query":"higurashi","redux_normalize_feed":true,"rs":"typed","scope":"pins","selected_one_bar_modules":null,"source_id":null,"source_module_id":null,"top_pin_id":null,"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1ZsWlVRbXhpVmtreVZsZHpOV0pIU2tkV2FscFhVbXhhVkZreU1WSmtNREZWVjIxR1RrMXNTbEJXYlhSaFVtMVdjMVZ1U2xaaWEzQnpXVlJPVTJWV1pISlhhM1JYVm10V05sVldVbE5XVjBwMVVXMUdWVll6VFhoVWJYaFhWMVp3Ums1V1RsTmlSbGt5Vm10YWFtVkdWbkpOU0dSUFZsZG9XRmxzWkc5VlZscHlWbGhrYkdKR1NubFdWelZQWVVaYWRHVkVRbFppUmtwVVZrUktWMlJIVWtWV2JHaHBVakZLU0Zkc1pEUmtNVnBZVW10b2FsSXdXbkJXYlRWRFpHeGFSMWRzVG1oaGVrWllXV3RvVTFVeFpFaFZiRUpoVm5wRk1GbHFSbXRYVjA1R1YyczFWMVpHV2pSWFZtaDNVakZrY2sxWVRsaGlhM0JXV1ZSR1MyRkdiRlZTYm1SVVVteHdXbGxWVlRGVk1VbDVWRmhrVjAxdVVuWlVhMXBTWlVaT2MxcEhSbE5TTWswMVdtdGFWMU5YU2paVmJYaFRUVmhDUjFZeU5YZFVNVkY0VjJ0b1ZXRnJOVlpVVmxwTFVURndXR042VmxOV2ExcGFXVlZWTlZVeFNYZE5WRTVYVWtWYVZGWkhNVTlXTVU1WllVWk9hR1ZyV2s1WFZ6QXhZakpPVjFWWWFHRlNWbkJRVm14U1IwMUdXWGxOVkVKVlRWWnNORll5TURWV1YwVjVWV3hDV21FeGNETmFSVnByVjFkS1IyTkhhR2xYUjJkM1ZtdGFhMlF4VVhsVGJGcE9Wa1p3YjFwWGVFdFZWbFp4VW14YWJGWnRVbHBaTUdoTFZHMUtTR1ZJYUZkV2VrWjJWMVphU21ReVJYcGpSbFpwVW10d1RGZHJVa0pPVms1SFZHNVNUbFl3V2xoVmJYUldaVVpaZUZremFGUk5hM0JYVkZaYVYyRkZNSGxWYkVKYVlrWlZlRnBGV210WFIwNUpVMnMxVTFaR1dscFdWekI0VFVaV1IxTllaR3BUUlhCb1dWUkdWbVZHVm5SbFJuQnNZbFpKTWxSVlVYaFBSVGxGV1hwR1QyVnJSVEZVVlZKT1RrVXhSVkpVUWs5bGJFVXhWRmhzZDFOR1ZsWmtNMFp0VWpGYWIxZFhjRXBsUlRGSVZWaHdUbFl4YTNoVVZWSnFUVVUxV0ZadGFFOVNSVnB6Vkd0a1drMUdiRFpUVkVaT1pXMWplRmRzVWxkaFJuQllWVlJTVDJWdFRqWlVNVkpTWlZad2NWcEhkRTlsYTFwMFZGVlNhMkpWTVZWVFZFcE9Wa1pzTmxkWE1WSk9WVEYwVlcweFVGWXdXVFJXUjNSWFYwZGFRbEJVTVRoUFJHTXhUbnBCTlUxRVRUUk5SRVV3VG5wUk5VMTVjRWhWVlhkeFprUlZlRTlFVVRKWlZHc3lUMWRSTWsxVVVUSk9iVnBvV1RKWmVrNTZXWGhPTWs1cFQwUkZNVTlFVm1sTlZGcHBUV3BTYTFsWFRtcE9SR015VG1wVk5GbHFaR2haVjFacldWUmFiVmxxWkdoYVZGWnFUa1JXT0ZSclZsaG1RVDA5fFVIbzVhRkpYZUc1WFYyUlpWVEpHYkdGNk1XWk5ha1ptVFZSR09FOUVZekZPZWtFMVRVUk5ORTFFUlRCT2VsRTFUWGx3U0ZWVmQzRm1SMWw1VFZSUk1WbDZUVEJhUjFGNVQxZFNhVnB0VlRGT1JFVXdXVlJuZVU1cVRUUk5hbU40VDBSSk1VNXFWVEZOYlZwcVdsUnJlRTFFVVhwWmVsVjNXbXBvYkU1dFJYbE9ha0Y2VDFSSk5VMTZWVEJaYWtJNFZHdFdXR1pCUFQwPXxOb25lfDg3NTcwOTAzODAxNDc0OTMqR1FMKnwzMjM3YjM3ZGNhMGU3YjYyYzYzYzAyZGJkNGU1MjdlNzMyMTExMTNlMmUyMzEyOWM2MDAzYmU1ZTlmZjkwYjAwfE5FV3w="]},"context":{}}
|
||||
]
|
||||
);
|
||||
];
|
||||
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$filter = [
|
||||
"source_url" => "/search/pins/?q=" . urlencode($search),
|
||||
"rs" => "typed",
|
||||
"data" =>
|
||||
json_encode(
|
||||
[
|
||||
"options" => [
|
||||
"article" => null,
|
||||
"applied_filters" => null,
|
||||
"appliedProductFilters" => "---",
|
||||
"auto_correction_disabled" => false,
|
||||
"corpus" => null,
|
||||
"customized_rerank_type" => null,
|
||||
"filters" => null,
|
||||
"query" => $search,
|
||||
"query_pin_sigs" => null,
|
||||
"redux_normalize_feed" => true,
|
||||
"rs" => "typed",
|
||||
"scope" => "pins", // pins, boards, videos,
|
||||
"source_id" => null
|
||||
],
|
||||
"context" => []
|
||||
]
|
||||
),
|
||||
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
|
||||
];
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
json_decode(
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://www.pinterest.ca/resource/BaseSearchResource/get/",
|
||||
$filter
|
||||
),
|
||||
|
@ -115,7 +133,11 @@ class pinterest{
|
|||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
//print_r($json);
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
foreach(
|
||||
$json
|
||||
|
@ -189,7 +211,6 @@ class pinterest{
|
|||
break;
|
||||
|
||||
case "board":
|
||||
|
||||
if(isset($item["cover_pin"]["image_url"])){
|
||||
|
||||
$image = [
|
||||
|
|
|
@ -83,6 +83,10 @@ $settings = [
|
|||
"value" => "startpage",
|
||||
"text" => "Startpage"
|
||||
],
|
||||
[
|
||||
"value" => "kagi",
|
||||
"text" => "Kagi"
|
||||
],
|
||||
[
|
||||
"value" => "qwant",
|
||||
"text" => "Qwant"
|
||||
|
|
Loading…
Reference in a new issue