1
Fork 0
mirror of https://git.lolcat.ca/lolcat/4get.git synced 2024-12-24 23:56:34 -05:00

implement SSL check for botretards

This commit is contained in:
lolcat 2024-08-09 10:06:08 -04:00
parent fbac3eeb8d
commit 883a650f84
8 changed files with 328 additions and 95 deletions

View file

@ -19,7 +19,8 @@ class autocomplete{
"marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}",
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
"sc" => "",
"startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english"
"startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english",
"kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}"
];
/*

View file

@ -63,6 +63,14 @@ class config{
//"via"
];
// Block SSL ciphers used by CLI tools used for botting
// Basically a primitive version of Cloudflare's browser integrity check
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
const DISALLOWED_SSL = [
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
];
// Maximal number of searches per captcha key/pass issued. Counter gets
// reset on every APCU cache clear (should happen once a day).
// Only useful when BOT_PROTECTION is NOT set to 0
@ -111,7 +119,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0";
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP

195
docs/apache2-example.md Normal file
View file

@ -0,0 +1,195 @@
# Sample Apache2 configuration
This is the apache2 configuration file used on the 4get.ca official instance, in hopes that it's useful to you!
Looking for the apache2 guide? <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">go here.</a>.
```xml
<VirtualHost *:443>
ServerName www.4get.ca
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
RedirectMatch 301 ^(.*)$ https://4get.ca$1
</VirtualHost>
<VirtualHost *:443>
ServerName 4get.ca
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/4get
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
<Directory /var/www/4get>
Options -MultiViews
AllowOverride All
Require all granted
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</Directory>
# deny access to private resources
<Directory /var/www/4get/data/>
Order Deny,allow
Deny from all
</Directory>
</VirtualHost>
<VirtualHost *:443>
ServerName www.lolcat.ca
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
</VirtualHost>
<VirtualHost *:443>
ServerName lolcat.ca
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/lolcat
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
<Directory /var/www/lolcat>
Options -MultiViews
AllowOverride All
Require all granted
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</Directory>
</VirtualHost>
<VirtualHost *:443>
ServerName www.nyym.co
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
RedirectMatch 301 ^(.*)$ https://nyym.co$1
</VirtualHost>
<VirtualHost *:443>
ServerName nyym.co
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/nyym
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
<Directory /var/www/nyym>
Options -MultiViews
AllowOverride All
Require all granted
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</Directory>
</VirtualHost>
<VirtualHost *:443>
ServerName git.lolcat.ca
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
ProxyPreserveHost On
ProxyRequests off
AllowEncodedSlashes NoDecode
ProxyPass / http://localhost:3000/ nocanon
</VirtualHost>
<VirtualHost *:443>
ServerName live.lolcat.ca
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/live
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
</VirtualHost>
```

View file

@ -74,7 +74,7 @@ Now, edit the following file: `/etc/apache2/sites-available/000-default.conf`, r
DocumentRoot /var/www/4get
Options +MultiViews
Options -MultiViews
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
@ -92,47 +92,56 @@ To make the above snippet work, please refer to our <a href="https://git.lolcat.
## default-ssl.conf
Now, edit the file `/etc/apache2/sites-available/default-ssl.conf`, remove everything and, again, add each rule while modifying the relevant fields:
This ruleset will redirect all clients that specify an unknown `Host` to the domain of our choice. I recommend you uncomment the `ErrorLog` directive while setting things up in case a problem occurs with PHP. Don't worry about the invalid SSL paths, we will generate our certificates later; Just make sure you specify the right domains in there:
First, append the following redirect rule to point traffic from `www.4get.ca` to `4get.ca`:
```xml
<VirtualHost *:443>
ServerName www.4get.ca
SSLEngine On
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
RedirectMatch 301 ^(.*)$ https://4get.ca$1
</VirtualHost>
```
This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private. `StdEnvVArs+` will make it so that PHP can view if the connection uses HTTPS, and which cipher was used. Useful for basic bot protection.
Make sure to replace `4get.ca` with your own domain under the `SSLCertificate*` directives!
```xml
<VirtualHost *:443>
ServerName 4get.ca
ServerAdmin will@lolcat.ca
DocumentRoot /var/www/4get
SSLEngine On
SSLOptions +StdEnvVars
#ErrorLog ${APACHE_LOG_DIR}/error.log
SSLEngine on
<FilesMatch "\.(?:cgi|shtml|phtml|php)$">
SSLOptions +StdEnvVars
</FilesMatch>
<Directory /usr/lib/cgi-bin>
SSLOptions +StdEnvVars
</Directory>
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
</VirtualHost>
```
This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private.
```xml
<VirtualHost *:443>
ServerName 4get.ca
DocumentRoot /var/www/4get
Options +MultiViews
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
<Directory /var/www/4get>
Options -MultiViews
AllowOverride All
Require all granted
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</Directory>
# deny access to private resources
<Directory /var/www/4get/data/>
@ -142,28 +151,7 @@ This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that
</VirtualHost>
```
Don't forget to specify your other services here! Here's an example of a ruleset I use for `lolcat.ca`:
```xml
<VirtualHost *:443>
ServerName lolcat.ca
DocumentRoot /var/www/lolcat
Options +MultiViews
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
RewriteRule ^([^\.]+)$ $1.php [NC,L]
</VirtualHost>
```
... Alongside with it's redirect rules.
```xml
<VirtualHost *:443>
ServerName www.lolcat.ca
RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
</VirtualHost>
```
By default, the first rule dictates where traffic should be redirected to in case the client specifies an unknown domain name. Don't forget your webserver's other rules! For a complete real-world example, please <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2-example.md">check out my real-world config file I use on 4get.ca</a>.
## security.conf
If you enabled the `headers` module, you can head over to `/etc/apache2/conf-enabled/security.conf` and edit:

View file

@ -89,6 +89,7 @@ class frontend{
$user_agent = "";
$bad_header = false;
// block bots that present X-Forwarded-For, Via, etc
foreach($headers_raw as $headerkey => $headervalue){
$headerkey = strtolower($headerkey);
@ -106,12 +107,27 @@ class frontend{
}
}
// SSL check
$bad_ssl = false;
if(
isset($_SERVER["https"]) &&
$_SERVER["https"] == "on" &&
isset($_SERVER["SSL_CIPHER"]) &&
in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
){
$bad_ssl = true;
}
if(
$bad_header === true ||
$bad_ssl === true ||
$user_agent == "" ||
// user agent check
preg_match(
config::HEADER_REGEX,
$user_agent
) ||
$bad_header === true
)
){
// bot detected !!
@ -1306,7 +1322,7 @@ class frontend{
return htmlspecialchars($image);
}
return "/proxy?i=" . urlencode($image) . "&s=" . $format;
return "https://4get.ca/proxy?i=" . urlencode($image) . "&s=" . $format;
}
public function htmlnextpage($gets, $npt, $page){

View file

@ -52,7 +52,7 @@ class mwmbl{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); // @todo reset
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);

View file

@ -13,7 +13,7 @@ class pinterest{
return [];
}
private function get($url, $get = []){
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
@ -45,7 +45,7 @@ class pinterest{
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->proxy->assign_proxy($curlproc);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@ -60,45 +60,63 @@ class pinterest{
public function image($get){
$search = $get["s"];
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
$filter = [
"source_url" => "/search/pins/?q=" . urlencode($search),
"rs" => "typed",
"data" =>
json_encode(
[
"options" => [
"article" => null,
"applied_filters" => null,
"appliedProductFilters" => "---",
"auto_correction_disabled" => false,
"corpus" => null,
"customized_rerank_type" => null,
"filters" => null,
"query" => $search,
"query_pin_sigs" => null,
"redux_normalize_feed" => true,
"rs" => "typed",
"scope" => "pins", // pins, boards, videos,
"source_id" => null
],
"context" => []
]
),
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
];
if($get["npt"]){
// @TODO
// post data for next page
$data = [
"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
"data" =>
json_encode(
[
// {"options":{"applied_filters":null,"appliedProductFilters":"---","article":null,"auto_correction_disabled":false,"corpus":null,"customized_rerank_type":null,"domains":null,"filters":null,"journey_depth":null,"page_size":null,"price_max":null,"price_min":null,"query_pin_sigs":null,"query":"higurashi","redux_normalize_feed":true,"rs":"typed","scope":"pins","selected_one_bar_modules":null,"source_id":null,"source_module_id":null,"top_pin_id":null,"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1ZsWlVRbXhpVmtreVZsZHpOV0pIU2tkV2FscFhVbXhhVkZreU1WSmtNREZWVjIxR1RrMXNTbEJXYlhSaFVtMVdjMVZ1U2xaaWEzQnpXVlJPVTJWV1pISlhhM1JYVm10V05sVldVbE5XVjBwMVVXMUdWVll6VFhoVWJYaFhWMVp3Ums1V1RsTmlSbGt5Vm10YWFtVkdWbkpOU0dSUFZsZG9XRmxzWkc5VlZscHlWbGhrYkdKR1NubFdWelZQWVVaYWRHVkVRbFppUmtwVVZrUktWMlJIVWtWV2JHaHBVakZLU0Zkc1pEUmtNVnBZVW10b2FsSXdXbkJXYlRWRFpHeGFSMWRzVG1oaGVrWllXV3RvVTFVeFpFaFZiRUpoVm5wRk1GbHFSbXRYVjA1R1YyczFWMVpHV2pSWFZtaDNVakZrY2sxWVRsaGlhM0JXV1ZSR1MyRkdiRlZTYm1SVVVteHdXbGxWVlRGVk1VbDVWRmhrVjAxdVVuWlVhMXBTWlVaT2MxcEhSbE5TTWswMVdtdGFWMU5YU2paVmJYaFRUVmhDUjFZeU5YZFVNVkY0VjJ0b1ZXRnJOVlpVVmxwTFVURndXR042VmxOV2ExcGFXVlZWTlZVeFNYZE5WRTVYVWtWYVZGWkhNVTlXTVU1WllVWk9hR1ZyV2s1WFZ6QXhZakpPVjFWWWFHRlNWbkJRVm14U1IwMUdXWGxOVkVKVlRWWnNORll5TURWV1YwVjVWV3hDV21FeGNETmFSVnByVjFkS1IyTkhhR2xYUjJkM1ZtdGFhMlF4VVhsVGJGcE9Wa1p3YjFwWGVFdFZWbFp4VW14YWJGWnRVbHBaTUdoTFZHMUtTR1ZJYUZkV2VrWjJWMVphU21ReVJYcGpSbFpwVW10d1RGZHJVa0pPVms1SFZHNVNUbFl3V2xoVmJYUldaVVpaZUZremFGUk5hM0JYVkZaYVYyRkZNSGxWYkVKYVlrWlZlRnBGV210WFIwNUpVMnMxVTFaR1dscFdWekI0VFVaV1IxTllaR3BUUlhCb1dWUkdWbVZHVm5SbFJuQnNZbFpKTWxSVlVYaFBSVGxGV1hwR1QyVnJSVEZVVlZKT1RrVXhSVkpVUWs5bGJFVXhWRmhzZDFOR1ZsWmtNMFp0VWpGYWIxZFhjRXBsUlRGSVZWaHdUbFl4YTNoVVZWSnFUVVUxV0ZadGFFOVNSVnB6Vkd0a1drMUdiRFpUVkVaT1pXMWplRmRzVWxkaFJuQllWVlJTVDJWdFRqWlVNVkpTWlZad2NWcEhkRTlsYTFwMFZGVlNhMkpWTVZWVFZFcE9Wa1pzTmxkWE1WSk9WVEYwVlcweFVGWXdXVFJXUjNSWFYwZGFRbEJVTVRoUFJHTXhUbnBCTlUxRVRUUk5SRVV3VG5wUk5VMTVjRWhWVlhkeFprUlZlRTlFVVRKWlZHc3lUMWRSTWsxVVVUSk9iVnBvV1RKWmVrNTZXWGhPTWs1cFQwUkZNVTlFVm1sTlZGcHBUV3BTYTFsWFRtcE9SR015VG1wVk5GbHFaR2haVjFacldWUmFiVmxxWkdoYVZGWnFUa1JXT0ZSclZsaG1RVDA5fFVIbzVhRkpYZUc1WFYyUlpWVEpHYkdGNk1XWk5ha1ptVFZSR09FOUVZekZPZWtFMVRVUk5ORTFFUlRCT2VsRTFUWGx3U0ZWVmQzRm1SMWw1VFZSUk1WbDZUVEJhUjFGNVQxZFNhVnB0VlRGT1JFVXdXVlJuZVU1cVRUUk5hbU40VDBSSk1VNXFWVEZOYlZwcVdsUnJlRTFFVVhwWmVsVjNXbXBvYkU1dFJYbE9ha0Y2VDFSSk5VMTZWVEJaYWtJNFZHdFdXR1pCUFQwPXxOb25lfDg3NTcwOTAzODAxNDc0OTMqR1FMKnwzMjM3YjM3ZGNhMGU3YjYyYzYzYzAyZGJkNGU1MjdlNzMyMTExMTNlMmUyMzEyOWM2MDAzYmU1ZTlmZjkwYjAwfE5FV3w="]},"context":{}}
]
);
];
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$filter = [
"source_url" => "/search/pins/?q=" . urlencode($search),
"rs" => "typed",
"data" =>
json_encode(
[
"options" => [
"article" => null,
"applied_filters" => null,
"appliedProductFilters" => "---",
"auto_correction_disabled" => false,
"corpus" => null,
"customized_rerank_type" => null,
"filters" => null,
"query" => $search,
"query_pin_sigs" => null,
"redux_normalize_feed" => true,
"rs" => "typed",
"scope" => "pins", // pins, boards, videos,
"source_id" => null
],
"context" => []
]
),
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
];
$proxy = $this->backend->get_ip();
}
try{
$json =
json_decode(
$this->get(
$proxy,
"https://www.pinterest.ca/resource/BaseSearchResource/get/",
$filter
),
@ -115,7 +133,11 @@ class pinterest{
throw new Exception("Failed to decode JSON");
}
//print_r($json);
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
foreach(
$json
@ -189,7 +211,6 @@ class pinterest{
break;
case "board":
if(isset($item["cover_pin"]["image_url"])){
$image = [

View file

@ -83,6 +83,10 @@ $settings = [
"value" => "startpage",
"text" => "Startpage"
],
[
"value" => "kagi",
"text" => "Kagi"
],
[
"value" => "qwant",
"text" => "Qwant"