From 658b3cbac50fae20aac1a1f15b0b4d00ce22d143 Mon Sep 17 00:00:00 2001 From: Korbs Date: Fri, 7 Jun 2024 20:47:08 -0400 Subject: [PATCH] Remove all trace of the name "4Get" and "lolcat.ca" and remove swear words --- CHANGESMADE.md | 8 + Dockerfile | 4 +- README.md | 31 +-- ami4get.php | 2 +- api.txt | 80 +++---- audio/spotify.php | 8 +- captcha.php | 2 +- data/config.php | 35 +--- docker-compose.yaml | 15 +- docker/apache/http.conf | 6 +- docker/apache/https.conf | 10 +- docker/docker-entrypoint.sh | 2 +- docker/gen_config.php | 6 +- docs/apache2.md | 228 -------------------- docs/caddy.md | 58 ------ docs/configure.md | 68 ------ docs/docker.md | 152 -------------- docs/nginx.md | 103 --------- docs/tor.md | 16 -- favicon.ico | Bin 193 -> 16958 bytes favicon.php | 3 +- favicon.svg | 1 + icons/lolcat.ca.png | Bin 753 -> 0 bytes images.php | 2 +- lib/backend.php | 2 +- lib/bingcache-todo-fix.php | 6 +- lib/curlproxy.php | 4 +- lib/frontend.php | 7 +- lib/{fuckhtml.php => heckhtml.php} | 2 +- lib/type-todo.php | 4 +- license.txt | 14 +- music.php | 2 +- news.php | 2 +- oracles/calc.php | 3 +- robots.txt | 28 +-- scraper/brave.php | 94 ++++----- scraper/crowdview.php | 6 +- scraper/curlie.php | 22 +- scraper/ddg.php | 66 +++--- scraper/google.php | 321 ++++++++++++++--------------- scraper/greppr.php | 36 ++-- scraper/imgur.php | 20 +- scraper/marginalia.php | 30 +-- scraper/mojeek.php | 140 ++++++------- scraper/mwmbl.php | 24 +-- scraper/qwant.php | 16 +- scraper/spotify.php | 10 +- scraper/yandex.php | 64 +++--- scraper/yep.php | 14 +- settings.php | 2 +- static/serverping.js | 51 +---- static/style.css | 138 ++++++------- template/about.html | 25 +-- template/header.html | 62 +++--- template/home.html | 44 ++-- template/instances.html | 2 +- videos.php | 2 +- web.php | 2 +- 58 files changed, 648 insertions(+), 1457 deletions(-) create mode 100644 CHANGESMADE.md delete mode 100644 docs/apache2.md delete mode 100644 docs/caddy.md delete mode 100644 docs/configure.md delete mode 100644 docs/docker.md delete mode 100644 docs/nginx.md delete mode 100644 docs/tor.md create mode 100644 favicon.svg delete mode 100644 icons/lolcat.ca.png rename lib/{fuckhtml.php => heckhtml.php} (99%) diff --git a/CHANGESMADE.md b/CHANGESMADE.md new file mode 100644 index 0000000..4d8bf6f --- /dev/null +++ b/CHANGESMADE.md @@ -0,0 +1,8 @@ +This is a fork of an existing project. + +In terms of how functions work, nothing was changed. + +The following changes were made: + - Remove all swear words, very rude + - New design + - Removed project name and replaced with "NarviSearch" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 8ee52cd..22765a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM alpine:latest -WORKDIR /var/www/html/4get +WORKDIR /var/www/html/narvisearch RUN apk update && apk upgrade RUN apk add php apache2-ssl php83-fileinfo php83-openssl php83-iconv php83-common php83-dom php83-sodium php83-curl curl php83-pecl-apcu php83-apache2 imagemagick php83-pecl-imagick php-mbstring imagemagick-webp imagemagick-jpeg @@ -7,8 +7,6 @@ RUN apk add php apache2-ssl php83-fileinfo php83-openssl php83-iconv php83-commo COPY ./docker/apache/ /etc/apache2/ COPY . . -RUN chmod 777 /var/www/html/4get/icons - EXPOSE 80 EXPOSE 443 diff --git a/README.md b/README.md index 6cc82a7..9a4519e 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,11 @@ -[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/W7W2OZK5H) +# NarviSearch search +**NarviSearch** is a proxy search engine that doesn't suck. -# 4get search -**4get** is a proxy search engine that doesn't suck. - -## About 4get -https://4get.ca/about +## About NarviSearch +https://search.sudovanilla.org/about ## Try it out -https://4get.ca - -## Totally unbiased comparison between alternatives - -| | 4get | searx(ng) | librex | araa | -|----------------------------|-------------------------|-----------|-------------|-----------| -| RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ | -| Does it suck | no (debunked by snopes) | yes | yes | a little | -| Does it work | ye | sometimes | no | sometimes | -| Did the dev commit suicide | not until my 30s | no | allegedly | no | +https://search.sudovanilla.org/ ## Features 1. Rotating proxies on a per-scraper basis @@ -24,9 +13,7 @@ https://4get.ca 3. Bot protection that *actually* filters out the bots (when configured) 4. Interface doesn't require javascript 5. Favicon fetcher with caching support & image proxy -6. Bunch of other shit - -tl;dr the best way to actually browse for shit. +6. Other neat thing # Supported websites @@ -45,9 +32,3 @@ tl;dr the best way to actually browse for shit. | Marginalia | | | | | | | wiby | | | | | | | Curlie | | | | | | - -# Installation -Refer to the documentation index. I recommend following the apache2 guide. - -## Contact -Shit breaks all the time but I repair it all the time too... Email me here: will (at) lolcat.ca or create an issue. diff --git a/ami4get.php b/ami4get.php index 5bb9273..69e3e01 100644 --- a/ami4get.php +++ b/ami4get.php @@ -11,7 +11,7 @@ $bot_requests = apcu_fetch("captcha_gen"); echo json_encode( [ "status" => "ok", - "service" => "4get", + "service" => "narvisearch", "server" => [ "name" => config::SERVER_NAME, "description" => config::SERVER_LONG_DESCRIPTION, diff --git a/api.txt b/api.txt index a64873e..0855300 100644 --- a/api.txt +++ b/api.txt @@ -1,11 +1,10 @@ - __ __ __ - / // / ____ ____ / /_ - / // /_/ __ `/ _ \/ __/ - /__ __/ /_/ / __/ /_ - /_/ \__, /\___/\__/ - /____/ - - + Welcome to the 4get API documentation + + _ _ _ ____ _ + | \ | | __ _ _ __ __ __ (_) / ___| ___ __ _ _ __ ___ | |__ + | \| | / _` | | '__| \ \ / / | | \___ \ / _ \ / _` | | '__| / __| | '_ \ + | |\ | | (_| | | | \ V / | | ___) | | __/ | (_| | | | | (__ | | | | + |_| \_| \__,_| |_| \_/ |_| |____/ \___| \__,_| |_| \___| |_| |_| + + + Welcome to the NarviSearch API documentation + + Terms of use Do NOT misuse the API. Misuses can include... :: @@ -26,15 +25,12 @@ If you wish to engage in the activities listed under "misuses", feel - free to download the source code of the project and running 4get + free to download the source code of the project and running NarviSearch under your own terms. Please respect the terms of use listed here so that this website may be available to all in the far future. - P.s fuck whoever botted my site for months on end, choke on my dick - lol!!!! - Get your instance running here :: - https://git.lolcat.ca/lolcat/4get + https://ark.sudovanilla.org/Korbs/NarviSearch Thanks! @@ -86,7 +82,7 @@ array position on the serber's memory. The second part is an encryption key used to decode the data at that position. This way, it is impossible to supply invalid pagination data and it is - impossible for a 4get operator to peek at the private data of the + impossible for a NarviSearch operator to peek at the private data of the user after a request has been made. The tokens will expire as soon as they are used or after a 15 @@ -99,7 +95,7 @@ + API Parameters - To construct a valid request, you can use the 4get web interface + To construct a valid request, you can use the NarviSearch web interface to craft a valid request, and replace "/web" with "/api/v1/web". @@ -119,8 +115,8 @@ /_____/_/ /_/\__,_/ .___/\____/_/_/ /_/\__/____/ /_/ -+ /ami4get - Tells you basic information about the 4get instance. CORS requests ++ /amins + Tells you basic information about the NarviSearch instance. CORS requests are allowed on this endpoint. @@ -174,7 +170,7 @@ value: "Higurashi is a great show!" 1: type: "quote" - value: "Source: my ass" + value: "Source: source" Each "description" node contains an array index named "type". @@ -194,30 +190,6 @@ Each individual node prepended with a "+" should be prepended by a newline when constructing the rendered description object. - There are some nodes that differ from the type-value format. - Please parse them accordingly :: - - + link - type: "link" - url: "https://lolcat.ca" - value: "Visit my website!" - - - + image - type: "image" - url: "https://lolcat.ca/static/pixels.png" - - - + audio - type: "audio" - url: "https://lolcat.ca/static/whatever.mp3" - - - The array index named "table" is an associative array. You can - loop over the data using this PHP code, for example :: - - foreach($table as $website_name => $url){ // ... - The rest of the JSON is pretty self explanatory. @@ -231,11 +203,11 @@ title: "My awesome Higurashi image" source: 0: - url: "https://lolcat.ca/static/profile_pix.png" + url: "https://example.com/static/profile_pix.png" width: 400 height: 400 1: - url: "https://lolcat.ca/static/pixels.png" + url: "https://example.com/static/pixels.png" width: 640 height: 640 2: @@ -273,11 +245,11 @@ When the endpoint is something else than "linear", you MUST use the specified endpoint. Otherwise, you are free to handle that - json+m3u8 crap yourself. If the endpoint is equal to "linear", the + json+m3u8 stuff yourself. If the endpoint is equal to "linear", the URL should return a valid HTTP audio stream. To access the endpoint, you must add the following prefix in your request, like so: - https://4get.ca/audio/?s= + https://search.sudovanilla.org/audio/?s= + /favicon @@ -287,7 +259,7 @@ Example :: - /favicon?s=https://lolcat.ca + /favicon?s=https://example.com If we had to revert to using Google's favicon cache, it will throw @@ -329,9 +301,8 @@ Content" parts, due to technical limitations that comes with converting m3u8 playlists to seekable audio files. If you use this endpoint, you must support these 206 codes and also handle the - initial 302 HTTP redirect. I used this method as I didn't want to - store information about your request needlessly. This method also - allows noJS users to access the files. + initial 302 HTTP redirect. This method also allows noJS users + to access the files. The parameter is "s" for the SoundCloud JSON m3u8 abomination. It does not support "normal" SoundCloud URLs at this time. @@ -339,12 +310,11 @@ + /audio/spotify Get a proxied Spotify audio file. Accepts a track ID for the "s" - parameter. Will only allow you to fetch the 30 second preview since - I don't feel like fucking with cookies and accounts every fucking - living moment of my life. You must handle the initial 302 redirect - to the /audio/linear endpoint. + parameter. Will only allow you to fetch the 30 second preview. + You must handle the initial 302 redirect to the /audio/linear + endpoint. + Appendix If you have any questions or need clarifications, please send an - email my way to will at lolcat.ca + email my way to korbs@sudovanilla.org. diff --git a/audio/spotify.php b/audio/spotify.php index dc8fae6..a87652b 100644 --- a/audio/spotify.php +++ b/audio/spotify.php @@ -7,8 +7,8 @@ class spotify{ public function __construct(){ - include "../lib/fuckhtml.php"; - $this->fuckhtml = new fuckhtml(); + include "../lib/heckhtml.php"; + $this->heckhtml = new heckhtml(); if( !isset($_GET["s"]) || @@ -39,10 +39,10 @@ class spotify{ $this->do404("Failed to fetch embed data"); } - $this->fuckhtml->load($embed); + $this->heckhtml->load($embed); $json = - $this->fuckhtml + $this->heckhtml ->getElementById( "__NEXT_DATA__", "script" diff --git a/captcha.php b/captcha.php index 286a277..e2f63c2 100755 --- a/captcha.php +++ b/captcha.php @@ -11,7 +11,7 @@ if( http_response_code(401); header("Content-Type: text/plain"); - echo "Fuck my feathered cloaca"; + echo "Heck my feathered cloaca"; die(); } diff --git a/data/config.php b/data/config.php index 13be0f4..4157b27 100644 --- a/data/config.php +++ b/data/config.php @@ -1,14 +1,14 @@ tag on home page const SERVER_SHORT_DESCRIPTION = "They live in our walls!"; @@ -71,37 +71,14 @@ class config{ // List of domains that point to your servers. Include your tor/i2p // addresses here! Must be a valid URL. Won't affect links placed on // the homepage. - const ALT_ADDRESSES = [ - //"https://4get.alt-tld", - //"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion" - ]; + const ALT_ADDRESSES = []; - // Known 4get instances. MUST use the https protocol if your instance uses + // Known NarviSearch instances. MUST use the https protocol if your instance uses // it. Is used to generate a distributed list of instances. // To appear in the list of an instance, contact the host and if everyone added // eachother your serber should appear everywhere. const INSTANCES = [ - "https://4get.ca", - "https://4get.zzls.xyz", - "https://4getus.zzls.xyz", - "https://4get.silly.computer", - "https://4get.konakona.moe", - "https://4get.lvkaszus.pl", - "https://4g.ggtyler.dev", - "https://4get.perennialte.ch", - "https://4get.sijh.net", - "https://4get.hbubli.cc", - "https://4get.plunked.party", - "https://4get.seitan-ayoub.lol", - "https://4get.etenie.pl", - "https://4get.lunar.icu", - "https://4get.dcs0.hu", - "https://4get.kizuki.lol", - "https://4get.psily.garden", - "https://search.milivojevic.in.rs", - "https://4get.snine.nl", - "https://4get.datura.network", - "https://4get.neco.lol" + "https://search.sudovanilla.org" ]; // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages diff --git a/docker-compose.yaml b/docker-compose.yaml index df41b23..832044e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,20 +1,15 @@ -# example docker-compose.yaml -version: "3.7" - services: fourget: - image: luuul/4get:latest + image: docker.io/library/4get restart: unless-stopped environment: - - FOURGET_VERSION=6 - - FOURGET_SERVER_NAME=4get.ca + - FOURGET_VERSION=1 + - FOURGET_SERVER_NAME=search.sudovanilla.org ports: - "80:80" - "443:443" volumes: - - /etc/letsencrypt/live/domain.tld:/etc/4get/certs - # mount custom banners and captcha - - ./banners:/var/www/html/4get/banner - - ./captcha:/var/www/html/4get/data/captcha + - ./banners:/var/www/html/narvisearch/banner + - ./captcha:/var/www/html/narvisearch/data/captcha diff --git a/docker/apache/http.conf b/docker/apache/http.conf index f496ba5..91a1be8 100644 --- a/docker/apache/http.conf +++ b/docker/apache/http.conf @@ -4,13 +4,13 @@ ServerRoot /var/www ServerSignature On ServerName localhost -DocumentRoot "/var/www/html/4get" +DocumentRoot "/var/www/html/narvisearch" LogLevel warn CustomLog /dev/null common ErrorLog /dev/null - + RewriteEngine On RewriteCond %{THE_REQUEST} ^\w+\ /(.*)\.php(\?.*)?\ HTTP/ RewriteRule ^ http://%{HTTP_HOST}/%1 [R=301] @@ -22,7 +22,7 @@ ErrorLog /dev/null # deny access to private resources - + Require all denied Require all denied diff --git a/docker/apache/https.conf b/docker/apache/https.conf index ca5edf5..1fde556 100644 --- a/docker/apache/https.conf +++ b/docker/apache/https.conf @@ -3,7 +3,7 @@ ServerRoot /var/www ServerSignature On ServerName localhost -DocumentRoot "/var/www/html/4get" +DocumentRoot "/var/www/html/narvisearch" LogLevel warn CustomLog /dev/null common @@ -11,11 +11,11 @@ ErrorLog /dev/null SSLEngine on - SSLCertificateFile /etc/4get/certs/fullchain.pem - SSLCertificateKeyFile /etc/4get/certs/privkey.pem + SSLCertificateFile /etc/narvisearch/certs/fullchain.pem + SSLCertificateKeyFile /etc/narvisearch/certs/privkey.pem - + RewriteEngine On RewriteCond %{THE_REQUEST} ^\w+\ /(.*)\.php(\?.*)?\ HTTP/ RewriteRule ^ http://%{HTTP_HOST}/%1 [R=301] @@ -27,7 +27,7 @@ ErrorLog /dev/null # deny access to private resources - + Require all denied Require all denied diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 66d4067..f2fef1b 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -20,6 +20,6 @@ fi php ./docker/gen_config.php -echo "4get is running" +echo "NarviSearch is running" exec httpd -DFOREGROUND diff --git a/docker/gen_config.php b/docker/gen_config.php index ceea117..d813f7b 100644 --- a/docker/gen_config.php +++ b/docker/gen_config.php @@ -1,6 +1,6 @@ getConstants()); @@ -40,14 +40,14 @@ function type_to_string($n) { function detect_captcha_dirs() { - $captcha_dir = "/var/www/html/4get/data/captcha/"; + $captcha_dir = "/var/www/html/narvisearch/data/captcha/"; $categories = (array_map(function ($n) { return explode("/", $n)[7]; }, glob($captcha_dir . "*"))); $result = array_map(function($category) { - return [$category, count(glob("/var/www/html/4get/data/captcha/" . $category . "/*" ))]; + return [$category, count(glob("/var/www/html/narvisearch/data/captcha/" . $category . "/*" ))]; }, $categories); return $result; diff --git a/docs/apache2.md b/docs/apache2.md deleted file mode 100644 index e746a7e..0000000 --- a/docs/apache2.md +++ /dev/null @@ -1,228 +0,0 @@ -# Install guide for Apache2 webserver -Welcome to the new and revamped 4get install manual for apache2. Even if you already have services running on an existing installation of apache2, you should still be able to adapt this guide to your needs. - -For starters, login as `root`. - -Then, install the following dependencies: -```sh -apt update -apt upgrade -apt install php-mbstring apache2 certbot php-imagick imagemagick php-curl curl php-apcu git libapache2-mod-php -``` - -Enable the required modules: -```sh -a2enmod ssl -a2enmod rewrite -``` - -And enable these optional ones, which might be useful to you later on. The `proxy` module is useful for setting up reverse proxies to services like gitea, and `headers` is useful to tweak global header values: -```sh -a2enmod proxy -a2enmod headers -``` - -Now, restart apache2: -```sh -service apache2 restart -``` - -Just for good measure, please check if your webserver is running. Access it through HTTP, not HTTPS. You should see the apache2 default landing page. - -## 000-default.conf -Now, edit the following file: `/etc/apache2/sites-available/000-default.conf`, remove everything and carefully add each rule specified here, while making sure to replace my domains with your own: - -1. The `VirtualHost` here instructs apache2 to redirect all **HTTP** traffic that specify an unknown `Host` header be redirected to a specific domain of your choice. Configuring this is not required but highly recommended. -```xml - - # no domain = go to 4get.ca - RedirectMatch 301 ^(.*)$ https://4get.ca$1 - -``` - -2. This instruction tells apache2 to redirect all HTTP traffic on `Host` lolcat.ca to the HTTPS version of the site. You should add a rule like this for all of your services explicitly. -```xml - - ServerName lolcat.ca - RedirectMatch 301 ^(.*)$ https://lolcat.ca$1 - -``` - -3. Subdomains won't be matched by the above rule, so I recommend you also add them to be more explicit: -```xml - - ServerName www.lolcat.ca - RedirectMatch 301 ^(.*)$ https://lolcat.ca$1 - -``` - -... Etc, for every service you own. - -4. And finally, append this configuration if you wish to host a tor or i2p access point. This configuration should not be binded to SSL(443) as Let's Encrypt does not let you create certificates for onion sites: -```xml - - # tor site - ServerName 4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion - - # compress - AddOutputFilterByType DEFLATE application/json - AddOutputFilterByType DEFLATE application/javascript - AddOutputFilterByType DEFLATE application/x-javascript - AddOutputFilterByType DEFLATE text/html - AddOutputFilterByType DEFLATE text/plain - AddOutputFilterByType DEFLATE text/css - - DocumentRoot /var/www/4get - - Options +MultiViews - RewriteEngine On - RewriteCond %{REQUEST_FILENAME} !-d - RewriteCond %{REQUEST_FILENAME} !-f - RewriteRule ^([^\.]+)$ $1.php [NC,L] - - # deny access to private resources - - Order Deny,allow - Deny from all - - -``` -To make the above snippet work, please refer to our tor site guide. - -## default-ssl.conf -Now, edit the file `/etc/apache2/sites-available/default-ssl.conf`, remove everything and, again, add each rule while modifying the relevant fields: - -This ruleset will redirect all clients that specify an unknown `Host` to the domain of our choice. I recommend you uncomment the `ErrorLog` directive while setting things up in case a problem occurs with PHP. Don't worry about the invalid SSL paths, we will generate our certificates later; Just make sure you specify the right domains in there: -```xml - - RedirectMatch 301 ^(.*)$ https://4get.ca$1 - ServerAdmin will@lolcat.ca - - #ErrorLog ${APACHE_LOG_DIR}/error.log - - SSLEngine on - - - SSLOptions +StdEnvVars - - - SSLOptions +StdEnvVars - - - AddOutputFilterByType DEFLATE application/json - AddOutputFilterByType DEFLATE application/javascript - AddOutputFilterByType DEFLATE application/x-javascript - AddOutputFilterByType DEFLATE text/html - AddOutputFilterByType DEFLATE text/plain - AddOutputFilterByType DEFLATE text/css - - SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem - SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem - -``` - -This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private. -```xml - - ServerName 4get.ca - - DocumentRoot /var/www/4get - - Options +MultiViews - RewriteEngine On - RewriteCond %{REQUEST_FILENAME} !-d - RewriteCond %{REQUEST_FILENAME} !-f - RewriteRule ^([^\.]+)$ $1.php [NC,L] - - # deny access to private resources - - Order Deny,allow - Deny from all - - -``` - -Don't forget to specify your other services here! Here's an example of a ruleset I use for `lolcat.ca`: -```xml - - ServerName lolcat.ca - - DocumentRoot /var/www/lolcat - - Options +MultiViews - RewriteEngine On - RewriteCond %{REQUEST_FILENAME} !-d - RewriteCond %{REQUEST_FILENAME} !-f - RewriteRule ^([^\.]+)$ $1.php [NC,L] - -``` - -... Alongside with it's redirect rules. -```xml - - ServerName www.lolcat.ca - RedirectMatch 301 ^(.*)$ https://lolcat.ca$1 - -``` - -## security.conf -If you enabled the `headers` module, you can head over to `/etc/apache2/conf-enabled/security.conf` and edit: -```sh -ServerTokens Prod # instead off Full -``` -and -```sh -ServerSignature Off #instead of On -``` -This will ensure that the `Server` header apache2 returns is minimal and doesn't leak information like your host system's OS or apache2 version. - -You can also uncomment `Header set X-Content-Type-Options: "nosniff"` and `Header set Content-Security-Policy "frame-ancestors 'self';"` respectively. - -## charset.conf -Head over to `/etc/apache2/conf-enabled/charset.conf` and uncomment `AddDefaultCharset UTF-8`. - -## other-vhost-access-log.conf -Since none of our configuration files contains any `CustomLog` directives, all we need to do to disable logging entirely is comment out the `CustomLog` directive located in `/etc/apache2/conf-enabled/other-vhost-access-log.conf`. Only error logs will remain if you configured them. - -## Symlink everything -Now comes the most important part of the setup. Run -```sh -ln -s /etc/apache2/sites-available/default-ssl.conf /etc/apache2/sites-enabled/default-ssl.conf -``` -Otherwise apache2 will ignore our SSL configuration. Handy, huh? - -# Setup SSL -Great, now we've configured the webserver, but we still don't have our security certificate. Let's generate one! - -First, stop `apache2`. -```sh -service apache2 stop -``` - -Now, run `certbot`, and specify all of your domains by prepending `-d` every time. Make sure the first domain you specify is your main domain, and the same domain you specified in the configuration above! We use ECDSA encryption here as it's better than RSA. -```sh -certbot certonly --standalone --key-type ecdsa -d 4get.ca -d www.4get.ca -d lolcat.ca -d www.lolcat.ca -``` - -Certbot should ask you a few questions, just play along. At the end of the setup, certbot should tell you about the location of the certificates. Double check to make sure they correspond to the paths we specified in `default-ssl.conf`. Your certificates should now update every 2-3 months automatically. - -After this is complete, create a directory in `/var/www/4get`. - -Now, start `apache2`. -```sh -service apache2 start -``` - -Congratulations! You now have a... 404 error on your webserver, if everything went well. Now's the time to make sure all of our redirect rules work! - -# Import the fun junk -Run these commands: -``` -cd /var/www/4get -git clone https://git.lolcat.ca/lolcat/4get -chmod 777 -R icons/ -``` - -... And try accessing your webserver. You should now have a working 4get instance! - -Please make sure to check out how to further configure 4get to your liking! diff --git a/docs/caddy.md b/docs/caddy.md deleted file mode 100644 index 90f438c..0000000 --- a/docs/caddy.md +++ /dev/null @@ -1,58 +0,0 @@ -# Install guide for Caddy webserver - -1. Install dependencies: - -`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git` - -2. Clone this repository where you want to host this from: - -`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get` - -3. Set permission on the `icons` directory inside `4get` - -`cd /var/www/4get/ && sudo chmod 777 -R icons/` - -4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile` - -```sh -4get.konakona.moe { - root * /var/www/4get - file_server - encode gzip - php_fastcgi unix//var/run/php/php8.2-fpm.sock { - index index.php - } - redir /{path}.php{query} 301 - try_files {path} {path}.php -} -``` - -Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile). - -5. Restart Caddy - -`sudo systemctl restart caddy` - -# Encryption setup -I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour. - -## Encryption setup on Apache - -```sh -certbot --apache --rsa-key-size 4096 -d www.yourdomain.com -d yourdomain.com -``` -When it asks to choose a vhost, choose the option with "HTTPS" listed. Don't setup HTTPS for tor, we don't need it (it doesn't even work anyways with let's encrypt) - -Edit `000-default-le-ssl.conf` - -Add this at the end: -```xml - - RewriteEngine On - RewriteCond %{REQUEST_FILENAME}.php -f - RewriteRule (.*) $1.php [L] - Options Indexes FollowSymLinks - AllowOverride All - Require all granted - -``` diff --git a/docs/configure.md b/docs/configure.md deleted file mode 100644 index 7cc4175..0000000 --- a/docs/configure.md +++ /dev/null @@ -1,68 +0,0 @@ -# 4get configuation options - -Welcome! This guide assumes that you have a working 4get instance. This will help you configure your instance to the best it can be! - -# Files location -1. The main configuration file is located at `data/config.php` -2. The proxies are located in `data/proxies/*.txt` -3. The captcha imagesets are located in `data/captcha/your_image_set/*.png` -4. The captcha font is located in `data/fonts/captcha.ttf` - -# Cloudflare bypass -**Note: this only allows you to bypass the browser integrity checks. Captchas & javascript challenges will not be bypassed.** - -Configuring this lets you fetch images sitting behind Cloudflare and allows you to scrape the **Yep** search engine. Following these instructions might make your package manager unhappy. - -First, follow these instructions. Only install the Firefox modules: - -https://github.com/lwthiker/curl-impersonate/blob/main/INSTALL.md#native-build - -Once you did this, you should be able to run the following inside your terminal: - -```sh -$ curl_ff117 --version -curl 8.1.1 (x86_64-pc-linux-gnu) libcurl/8.1.1 NSS/3.92 zlib/1.2.13 brotli/1.0.9 zstd/1.5.4 libidn2/2.3.3 nghttp2/1.56.0 -Release-Date: 2023-05-23 -Protocols: dict file ftp ftps gopher gophers http https imap imaps mqtt pop3 pop3s rtsp smb smbs smtp smtps telnet tftp ws wss -Features: alt-svc AsynchDNS brotli HSTS HTTP2 HTTPS-proxy IDN IPv6 Largefile libz NTLM NTLM_WB SSL threadsafe UnixSockets zstd -``` -Now, after compiling, you should have a `libcurl-impersonate-ff.so` sitting somewhere. Mine (on my debian install) is located at `/usr/local/lib/libcurl-impersonate-ff.so`. - -Find the `libcurl.so.4` file used by your current installation of curl. For me, this file is located at `/usr/lib/x86_64-linux-gnu/libcurl.so.4` - -Now comes the sketchy part: replace `libcurl.so.4` with `libcurl-impersonate-ff.so`. You can do this in the following way: -```sh -sudo rm /usr/lib/x86_64-linux-gnu/libcurl.so.4 -sudo cp /usr/local/lib/libcurl-impersonate-ff.so /usr/lib/x86_64-linux-gnu/libcurl.so.4 -``` - -Make sure to restart your webserver and/or PHP daemon, otherwise it will keep using the old library. You should now be able to bypass Cloudflare's shitty checks!! - -# Robots.txt -Make sure you configure this right to optimize your search engine presence! Head over to `/robots.txt` and change the 4get.ca domain to your own domain. - -# Server listing -To be listed on https://4get.ca/instances , you must contact *any* of the people in the server list and ask them to add you to their list of instances in their configuration. The instance list is distributed, and I don't have control over it. - -If you see spammy entries in your instances list, simply remove the instance from your list that pushes the offending entries. - -# Proxies -4get supports rotating proxies for scrapers! Configuring one is really easy. - -1. Head over to the **proxies** folder. Give it any name you want, like `myproxy`, but make sure it has the `txt` extension. -2. Add your proxies to the file. Examples: - ```conf - # format -> :
::: - # protocol list: - # raw_ip, http, https, socks4, socks5, socks4a, socks5_hostname - socks5:1.1.1.1:juicy:cloaca00 - http:1.3.3.7:: - raw_ip:::: - ``` -3. Go to the **main configuration file**. Then, find which website you want to setup a proxy for. -4. Modify the value `false` with `"myproxy"`, with quotes included and the semicolon at the end. - -Done! The scraper you chose should now be using the rotating proxies. When asking for the next page of results, it will use the same proxy to avoid detection! - -## Important! -If you ever test out a `socks5` proxy locally on your machine and find out it works but doesn't on your server, try supplying the `socks5_hostname` protocol instead. Hopefully this tip can save you 3 hours of your life! diff --git a/docs/docker.md b/docs/docker.md deleted file mode 100644 index e56b5ca..0000000 --- a/docs/docker.md +++ /dev/null @@ -1,152 +0,0 @@ -#### Install guide for Docker - -When using docker container any environment variables prefixed with `FOURGET_` will be added to the generated config located at `/var/www/html/4get/data/config.php` - -When lists of data is expected in [data/config.php](../data/config.php), such as `INSTANCES`, you can pass in a comma separated string via environment variable. - -Example: -`FOURGET_INSTANCES="https://4get.ca,https://domain.tld"` - -#### Special environment variables - -| Name | value | Example | -| - | - | - | -| FOURGET_PROTO | "http" or "https" | "https" | - - -#### Important directories - -| Mountpoint | Description | -| - | - | -| /etc/4get/certs | SSL certificate directory | -| /var/www/html/4get/banner | Custom Banners directory | -| /var/www/html/4get/data/captcha | Captcha dataset | - - -the certificate directory `/etc/4get/certs` expects files named `fullchain.pem` and `privkey.pem` - -The captcha dataset should have a subdirectory for each category. In each category, images should be named from 1.png to X.png, and be 100x100 in size. - -example directory structure: - -``` -captcha/ - birds/ - 1.png - 2.png - 3.png - anime/ - 1.png - 2.png -``` - -For more information on configuration view [data/config.php](../data/config.php) - -#### Usage - -You can start 4get with - -``` -docker run -d -p 80:80 -e FOURGET_SERVER_NAME="4get.ca" -e FOURGET_PROTO="http" luuul/4get:latest -``` - -...Or with SSL: - -``` -docker run -d -p 443:443 -e FOURGET_SERVER_NAME="4get.ca" -e FOURGET_PROTO="https" -v /etc/letsencrypt/live/domain.tld:/etc/4get/certs luuul/4get:latest -``` - - -#### With Docker Compose - -Replace relevant values and start with `docker compose up -d` - -##### HTTP - -``` -# docker-compose.yaml -version: "3.7" - -services: - fourget: - image: luuul/4get:latest - restart: unless-stopped - environment: - - FOURGET_VERSION=6 - - FOURGET_PROTO=http - - FOURGET_SERVER_NAME=4get.ca - - ports: - - "80:80" -``` - -##### HTTPS - -``` -# docker-compose.yaml -version: "3.7" - -services: - fourget: - image: luuul/4get:latest - restart: unless-stopped - environment: - - FOURGET_VERSION=6 - - FOURGET_PROTO=https - - FOURGET_SERVER_NAME=4get.ca - - ports: - - "80:80" - - "443:443" - - volumes: - - /etc/letsencrypt/live/domain.tld:/etc/4get/certs -``` - -##### Captcha Enabled - -Set `FOURGET_BOT_PROTECTION=1` and mount a directory containing captcha files to `/var/www/html/4get/data/captcha` - - -``` -# docker-compose.yaml -version: "3.7" - -services: - fourget: - image: luuul/4get:latest - restart: unless-stopped - environment: - - FOURGET_VERSION=6 - - FOURGET_PROTO=http - - FOURGET_SERVER_NAME=4get.ca - - FOURGET_BOT_PROTECTION=1 - - ports: - - "80:80" - - volumes: - - ./captcha:/var/www/html/4get/data/captcha -``` - -##### Custom Banners - -``` -# docker-compose.yaml -version: "3.7" - -services: - fourget: - image: luuul/4get:latest - restart: unless-stopped - environment: - - FOURGET_VERSION=6 - - FOURGET_PROTO=http - - FOURGET_SERVER_NAME=4get.ca - - ports: - - "80:80" - - volumes: - - ./banners:/var/www/html/4get/banner -``` diff --git a/docs/nginx.md b/docs/nginx.md deleted file mode 100644 index 8693559..0000000 --- a/docs/nginx.md +++ /dev/null @@ -1,103 +0,0 @@ -# Install on NGINX - ->I do NOT recommend following this guide, only follow this if you *really* need to use nginx. I recommend you use the apache2 steps instead. - -Login as root. - -Create a file in `/etc/nginx/sites-avaliable/` called `4get.conf` or any name you want and put this into the file: - -``` -server { - # DO YOU REALLY NEED TO LOG SEARCHES? - access_log /dev/null; - error_log /dev/null; - # Change this if you have 4get in other folder. - root /var/www/4get; - # Change yourdomain by your domain lol - server_name www.yourdomain.com yourdomain.com; - - location @php { - try_files $uri.php $uri/index.php =404; - # Change the unix socket address if it's different for you. - fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock; - fastcgi_index index.php; - # Change this to `fastcgi_params` if you use a debian based distro. - include fastcgi.conf; - fastcgi_intercept_errors on; - } - - location / { - try_files $uri @php; - } - - location ~* ^(.*)\.php$ { - return 301 $1; - } - - listen 80; -} -``` - -That is a very basic config so you will need to adapt it to your needs in case you have a more complicated nginx configuration. Anyways, you can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf) - -After you save the file you will need to do a symlink of the `4get.conf` file to `/etc/nignx/sites-enabled/`, you can do it with this command: - -```sh -ln -s /etc/nginx/sites-available/4get.conf /etc/nginx/sites-available/4get.conf -``` - -Now test the nginx config with `nginx -t`, if it says that everything is good, restart nginx using `systemctl restart nginx` - -# Encryption setup - -Generate a certificate for the domain using: - -```sh -certbot --nginx --key-type ecdsa -d www.yourdomain.com -d yourdomain.com -``` -(Remember to install the nginx certbot plugin!!!) - -After doing that certbot should deploy the certificate automatically into your 4get nginx config file. It should be ready to use at that point. - -# Tor setup on NGINX - -Important Note: Tor onion addresses are significantly longer than traditional domain names. Before proceeding with Nginx configuration, ensure you increase the `server_names_hash_bucket_size` value in your `nginx.conf` file. This setting in your Nginx configuration controls the internal data structure used to manage multiple server names (hostnames) associated with your web server. Each hostname requires a certain amount of memory within this structure. If the size is insufficient, Nginx will encounter errors. - -1. Open your `nginx.conf` file (that is under `/etc/nginx/nginx.conf`). -2. Find the line containing `# server_names_hash_bucket_size 64;`. -3. Uncomment the line and adjust the value. Start with 64, but if you encounter issues, incrementally increase it (e.g., 128, 256) until it accommodates your configuration. - -Open your current 4get NGINX config (that is under `/etc/nginx/sites-available/`) and append this to the end of the file: - -``` -server { - access_log /dev/null; - error_log /dev/null; - - listen 80; - server_name ; - root /var/www/4get; - - location @php { - try_files $uri.php $uri/index.php =404; - # Change the unix socket address if it's different for you. - fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock; - fastcgi_index index.php; - # Change this to `fastcgi_params` if you use a debian based distro. - include fastcgi.conf; - fastcgi_intercept_errors on; - } - - location / { - try_files $uri @php; - } - - location ~* ^(.*)\.php$ { - return 301 $1; - } -} -``` - -Obviously replace `` by the onion address of `/var/lib/tor/4get/hostname` and then check if the nginx config is valid with `nginx -t` if yes, then restart the nginx service and try opening the onion address into the Tor Browser. You can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf) - -Once you did the above, refer to this tor guide to setup your onionsite. diff --git a/docs/tor.md b/docs/tor.md deleted file mode 100644 index b29ac3d..0000000 --- a/docs/tor.md +++ /dev/null @@ -1,16 +0,0 @@ -# Tor setup -This guide assumes that there is already a configured webserver sitting on port 80 waiting for localhost connections. The apache2 guide guides you through this. - -1. Login as `root`. -2. Install `tor`. -3. Edit `/etc/tor/torrc` -4. Go to the line that contains `HiddenServiceDir` and `HiddenServicePort`, uncomment those 2 lines and set them like this: - ``` - HiddenServiceDir /var/lib/tor/4get - HiddenServicePort 80 127.0.0.1:80 - ``` -5. Restart the tor service using `service tor restart` -6. Wait for a while... -7. Run `cat /var/lib/tor/4get/hostname`. That is your onion address! - -# Specify your own tor address diff --git a/favicon.ico b/favicon.ico index a54beb09a2d19111adfff688db40f26bea8a4d29..f3ef6bdf2eb0ee08a6ffa52d8108cd90230b2741 100644 GIT binary patch literal 16958 zcmeI42Y6Lg5`eScw?I}`-L<#f)zx(^R8c_?q^e*8v4JQeh(JQ`1P}?m2c&}(=^aFn z5~LTYN|g@MLnriv+4;}SO&&M*zLH1$7Iw$)@$&Aw_netI(`TL$5r5GCiWG^^e; zvRFjKy%7-+PwGoAMYPrz*9{bjh!wNd-fv3Y+IPt7Up^yS z_N{l^b#s2)e{8pWIkJWnsDGd2tof(Z*9RR6$f|9Nl<2EBC*exf&O=+Ja`%#wy?R#3 zQ6o}v)x0H@{_HifNy+Aq%cLdW$i>SSZYmyJNxCd^*G`sq+dVINqW>(pqSEO0l>vD0 ze4W3_SEHijw}f3c4G)%Vnj zYkVQRaPk2EVB1RGpRIMf6m9y544d0S&Rslv!|(t-_N5J7s6VK@96Y{PE+<`*Icp|K znKpTh&gb-z1^C9ZHXpE^{GBKIHfi~7H936pw+!*%s>)ANa*|xSa#7A-OqBDN5+y0= zN`}@7=Apuk9!}MN>{_L6#g+Db*F3uO;AUwwq@tejZN~ol@+;p^_hznbq>{<=WssP7%SaDepru8p)EQ(Kx3`%D@Rt|(D`%1Wi~C8TnX zlG0%CNAk^_Zn9y|DmilUfZ@$G2!`?DRkdH1UyGEYP5zM*=cQjT@!}a7^3LxJh%&LyO6Q4BO0|5-?DJB?2PI$KyG+c%{_y_kFS3{y zcE@^QgHr5}-czu_1M+6;oKmw-8R_?ZJ6W@RN!Yqpt=H?u|6#}8`YM;~JGwJH@hMmE zbDTl#e(xH*1Nlw9PopJ^p^(`gXV0JZ#M|E& zKdhdhHtyaO9f*|1Lq1VocEaByy4NWgIsa=xc1*nNV;7t_XXa=4fqdkxb%&9OH9M9{ zsQw2#(`iCOFoBLmh`In^=x<(*;o!=Y%)-$ke9`-QJ zevp@1=i_|t+Rv6OfXD1L6GMv4x2V5*Pi?f(X~iG89z45?T)uKC7#8SW$+A^-f*Jz$ z9Xg=tTKuj(W6lg3i6;s-dPvHu&%jsI8}Okt`KFTk-H#By6Vh0A*H>g|iwmse4m9yWP`6+@=+0{o!) zjn+AoU)5#$ijk(ybWC;eg6c2(Vax1T2>VXny?jf&44l=`#M=cM+@Eq*?)e*gywf(X z?9$psD1aTs7lO0JgS$+Q6ABO5kNEMI-9Jg!Nlg`x=aiRssVv-LbhJ{pVlrXzU^$?1 zX7bgfP|!2?5*vaiwz^oeM=e}k{17~7F}!L}w#*j^>Rp_T#e-}nRv8f=7m^&<>)|UB zP9K&PTj$HvL_I6^Ku%f3haFCttn0 zjo-hLiSc--INJALx_nV)tsXCLr^u}@53m&#JHIT)^bA6UPnAPS%{(ASw=mmXM zFEcMEv2eU<^gquP{}vI?bJqq!$O z{gbZ6WXyv8a`uA%c>n0BgHo$+IX$y9-+&BoZW#r9sflmYFR*T*Mh|N2HzuSw%egoH zp<1uf#upL?5c3d&lN(Y~u^!+^l=kKI=vws@;;XyHtF&6wX!}pe;|EBi*daGv)UdU$wm8k}UD#)azLrwg! zVb5w=rv4s#Lo7tCks1qf4L`{L@U6rQUa)fOk5WYQdJ{WpJo8e+2h=C8^twH$>$9~c zWYx^GDQ~k$=~hq6`kl*zde**at@`d#t)4QpgB!9BKfK~?>@9Ijm7cGqo+maAUex&d zZP}$Y6*s_s4w%^?wN}S78MTA<`m8y5*)UTZWIfKVqUH|N%z^=Og^#-ywa#5>r=AgP zPX!*)pYyfv?Ei_bM-R&l5>6jd{#tr~4n*~R$HbU!_Xc~7xMTRdIC)NU1al5b2YEEt z{Q)s;_sPx8Glp;Km-i)j*Ib-yAs^%f(BbRjS&N>nmye&aY`C$Z;ELX?+P2WSbjY9S z1;%R4&d7k$|C;*91IPCGT?cztq{-h6{=5@og$h5a4Z<$^`ZtRgkrFqpm8(xWe&(o@ z@AQ(9Rd~l)g_0k8;Yb~by|IPF%ZE;Q*L|sj#*V3#qVti`OK~M<3>Bc|z4rN1?SL%@ z*b!b2C;vMRZZdsj?x&9Fuh!@fFYFIF zBkS0-+wbT>+_aXikK<1YX^st_@UQc9&LJGYj=gDeq-<(mZ9Q1i9>Pd4qm!&}bS3zv@L8yL9(=TEyA4+jNPvj7KVj{F|I^n#_E=bAi} z_zwFWYAwOJ?hl&JdiCz;#m!qc)%l3|OwGaO6R}%3FlbgMbN1*VzQ^BeNRKHoM()6w z8bG*U(UDe9a>@YTyTMY8AFQ5lq~z8*Z#d9pViO}voGCpXuUwT}g}epXFVXz5kn2%y zI1=|dV*r~6$o9zjecT?CjcNnQ-Ov;64+mmLHU$6N5uPxjFT{aDZCi zr`<|;(vMvnr#(eC*nVJRNrlrsPB6(@*SY(@&kk3z!K&Ch?KeSB&zyEvW0*7FqGfz^pW&MeZ1>?{?J5y4w-XT6lL>PD~cHE5##!1HdvUPgxWKJ>NS*gBqka(mwG8n~D5SkSBoUzvk+0<=}tnjLlt zeBlAOK40ff)6Yex$;qf+kwZ`u!GCkVTYl$DbK2jL@~ql&Y7uUboSZB@r?fC<51!;| z>q4wox~{9Wi4*-@B{~H8gBKU@rwtAOjFRbD?L?vmP$9XXXCj) zVYkV<;Vn2)ZzmqVUa)EJn$(&Kc_+1TCy?u7AA^_YYiA_9cmYk7yT9UT$EZ0p(pf^= z-!s$Vl@~g34)!y%u2r?CV##q=p(YI>Ns)P{3d&Lm^hN+>>%jSx* zd=1Yr*A=d>55%|L(_5$N8TAFXxg&Bn;&1#9x?sz(+j(2(X3a*yGisX52{S|N2Gr5% zX)xzt^xgPbrCt51S4`lxPDZa0UzCZ}IdOW!%JT}^Yv7s_sE_}oJr8D_na@H-E!$zw zx>lDuip}8t$#0|$3%>8{H*E%Gi=QLxB*p*D1BzMF~2igt@B6dtG|8%Yb@C~+suL)T}S`)#XKyvnWLu;xow?k z*O+l9wzcHiveD$l8SQg;A$m#mX!W+mCgx#w**4jX8X>-GJC z<9kdE0^AIKif6vMckAy;DwtfB9xX9>Ab?-kbNZc~wARFL6tRV=bnCjgG!CE_I9X>V z0?ot(+V6UPkKU7Gk>?osR9=8L_JNs9WBIdu0_R7ciX4$TiFIyPjDY_`H}L7u2VZz+_8k9Z_bVRF-WX6~|aq=eObmqGFOpTM8LHlv_WQxw&P($>} UZ8L(0?=$xQ2jKtj|C=264<^iTq5uE@ literal 193 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE3?yBabR7dy`~f~8uB%rp=HcP-O-L7F1qv}N zyQ_E)NacFEIEGZ*I@@o^=VZupwBXY3`I22q`n^GFU-sTQl^yTTn8yA6kc!edh5b|V zoBCt4Cw~$KO9^2UVGu!x-FmC7R~Dx_-Q#I o+rVQd`zcF_3+E5E{9C_=@$e0%h0hOGyaqYI)78&qol`;+0MQpnOaK4? diff --git a/favicon.php b/favicon.php index 2a31839..8e5140a 100644 --- a/favicon.php +++ b/favicon.php @@ -200,7 +200,6 @@ class favicon{ /* Download the favicon */ - //$href = "https://git.lolcat.ca/assets/img/logo.svg"; try{ $payload = @@ -351,7 +350,7 @@ class favicon{ private function defaulticon(){ - // give 404 and fuck off + // give 404 http_response_code(404); $handle = fopen("lib/favicon404.png", "r"); diff --git a/favicon.svg b/favicon.svg new file mode 100644 index 0000000..4f69340 --- /dev/null +++ b/favicon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/icons/lolcat.ca.png b/icons/lolcat.ca.png deleted file mode 100644 index bf47a4414cdd4068d8482aff82b7f966c2294c0f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 753 zcmViGHtV zkke`z!PQ_eM3x}=ZAP(kLwUc`VwNzMmJ{=9nm^3S^Ei_p4x6I{G}6%X&; z#F?W9FmuX8T^S*H+caePy{uqR(?mEWl~tN zvuPAG&+fwN5iRtyi+bb!`eEo$B`+3Nu?1oE==Hm5xsqTqMUmBH*wZ4T0aY`4GxVd&H zs$`8pIA~QdmtHG{fU*@*T0e+LRADHCi+Rzrpj*ou*Yu0wURX9rE0-%37{&3qJy0%Z za_pPl6 zXY%NbI@FDbfETQHo02JXdiOfa`qA0lK>Y9bP5@z5FP!lY&d${cV@P3}Mwsd`MXj9L z`kog7i+pC;m^?;6udD{o^gg0ZShgCd@scraper)); if($pool === false){ - // we don't want a proxy, fuck off! + // No proxy, please return 'raw_ip::::'; } diff --git a/lib/bingcache-todo-fix.php b/lib/bingcache-todo-fix.php index a4acb5b..4f0ef5b 100644 --- a/lib/bingcache-todo-fix.php +++ b/lib/bingcache-todo-fix.php @@ -1,7 +1,7 @@ new bingcache(); @@ -90,7 +90,7 @@ class bingcache{ $frontend->load( "error.html", [ - "title" => "Shit", + "title" => "Welp!", "text" => $text ] ); diff --git a/lib/curlproxy.php b/lib/curlproxy.php index 313ab01..6ebe4a7 100644 --- a/lib/curlproxy.php +++ b/lib/curlproxy.php @@ -207,7 +207,7 @@ class proxy{ function($downloadsize, $downloaded, $uploadsize, $uploaded ){ - // if $downloaded exceeds 100MB, fuck off + // if $downloaded exceeds 100MB return ($downloaded > 100000000) ? 1 : 0; }); @@ -631,7 +631,7 @@ class proxy{ ){ // format could not be found, but imagemagick can - // sometimes detect it? shit's fucked + // sometimes detect it? $format = false; } diff --git a/lib/frontend.php b/lib/frontend.php index 1c3eb09..8f87344 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -119,7 +119,7 @@ class frontend{ $this->drawerror( "Tshh, blocked!", - 'Your browser, IP or IP range has been blocked from this 4get instance. If this is an error, please contact the administrator.' + 'Your browser, IP or IP range has been blocked from this NarviSearch instance. If this is an error, please contact the administrator.' ); die(); } @@ -155,14 +155,14 @@ class frontend{ } $this->drawerror( - "Shit", + "Oh no!", 'This scraper returned an error:' . '
' . htmlspecialchars($error) . '
' . 'Things you can try:' . '
' . 'If the error persists, please contact the administrator.', $timetaken @@ -470,7 +470,6 @@ class frontend{ $archives = []; $path = explode("/", $host["path"]); $count = count($path); - // /pol/thread/417568063/post-shitty-memes-if-you-want-to if($count !== 0){ diff --git a/lib/fuckhtml.php b/lib/heckhtml.php similarity index 99% rename from lib/fuckhtml.php rename to lib/heckhtml.php index f3a6efe..efe8d48 100644 --- a/lib/fuckhtml.php +++ b/lib/heckhtml.php @@ -1,5 +1,5 @@ - - Everyone is permitted to copy and distribute verbatim or modified - copies of this license document, and changing it is allowed as long - as the name is changed. - - DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. You just DO WHAT THE FUCK YOU WANT TO. \ No newline at end of file +so yeah uh. this mine now. \ No newline at end of file diff --git a/music.php b/music.php index c49fc08..7f49506 100644 --- a/music.php +++ b/music.php @@ -1,7 +1,7 @@ fuckhtml = new fuckhtml(); + include "lib/heckhtml.php"; + $this->heckhtml = new heckhtml(); include "lib/backend.php"; $this->backend = new backend("brave"); @@ -327,13 +327,13 @@ class brave{ ]; // load html - $this->fuckhtml->load($html); + $this->heckhtml->load($html); /* Get next page "token" */ $nextpage = - $this->fuckhtml + $this->heckhtml ->getElementById( "pagination", "div" @@ -341,10 +341,10 @@ class brave{ if($nextpage){ - $this->fuckhtml->load($nextpage); + $this->heckhtml->load($nextpage); $nextpage = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName("btn", "a"); if(count($nextpage) !== 0){ @@ -354,7 +354,7 @@ class brave{ if( strtolower( - $this->fuckhtml + $this->heckhtml ->getTextContent( $nextpage ) @@ -363,7 +363,7 @@ class brave{ preg_match( '/offset=([0-9]+)/', - $this->fuckhtml->getTextContent($nextpage["attributes"]["href"]), + $this->heckhtml->getTextContent($nextpage["attributes"]["href"]), $nextpage ); @@ -381,10 +381,10 @@ class brave{ } } - $this->fuckhtml->load($html); + $this->heckhtml->load($html); $script_disc = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName( "script" ); @@ -410,7 +410,7 @@ class brave{ } $data = - $this->fuckhtml + $this->heckhtml ->parseJsObject( $grep[1] ); @@ -458,7 +458,7 @@ class brave{ "title" => $this->titledots($cluster["title"]), "description" => $this->titledots( - $this->fuckhtml + $this->heckhtml ->getTextContent( $cluster["description"] ) @@ -776,7 +776,7 @@ class brave{ ) ) : $this->titledots( - $this->fuckhtml + $this->heckhtml ->getTextContent( $result["description"] ) @@ -902,9 +902,9 @@ class brave{ } $table[ - $this->fuckhtml->getTextContent($row[0]) + $this->heckhtml->getTextContent($row[0]) ] = - $this->fuckhtml->getTextContent($row[1]); + $this->heckhtml->getTextContent($row[1]); } } @@ -913,7 +913,7 @@ class brave{ foreach($info["profiles"] as $row){ - $name = $this->fuckhtml->getTextContent($row["name"]); + $name = $this->heckhtml->getTextContent($row["name"]); if(strtolower($name) == "steampowered"){ @@ -921,14 +921,14 @@ class brave{ } $sublink[ - $this->fuckhtml->getTextContent($name) + $this->heckhtml->getTextContent($name) ] = - $this->fuckhtml->getTextContent($row["url"]); + $this->heckhtml->getTextContent($row["url"]); } } $out["answer"][] = [ - "title" => $this->fuckhtml->getTextContent($info["title"]), + "title" => $this->heckhtml->getTextContent($info["title"]), "description" => $description, "url" => $info["url"], "thumb" => isset($info["images"][0]["original"]) ? $info["images"][0]["original"] : null, @@ -957,7 +957,7 @@ class brave{ isset($video["thumbnail"]["src"]) ? [ "ratio" => "16:9", - "url" => $this->unshiturl($video["thumbnail"]["src"]) + "url" => $this->fixURLthingie($video["thumbnail"]["src"]) ] : [ "ratio" => null, @@ -983,7 +983,7 @@ class brave{ isset($video["thumbnail"]["src"]) ? [ "ratio" => "16:9", - "url" => $this->unshiturl($video["thumbnail"]["src"]) + "url" => $this->fixURLthingie($video["thumbnail"]["src"]) ] : [ "ratio" => null, @@ -1023,7 +1023,7 @@ class brave{ "description" => $this->limitstrlen( $this->titledots( - $this->fuckhtml + $this->heckhtml ->getTextContent( $disc["description"] ) @@ -1128,7 +1128,7 @@ class brave{ ]; // load html - $this->fuckhtml->load($html); + $this->heckhtml->load($html); // get npt $out["npt"] = @@ -1152,7 +1152,7 @@ class brave{ throw new Exception("Failed to grep javascript object"); } - $json = $this->fuckhtml->parseJsObject($json[1], true); + $json = $this->heckhtml->parseJsObject($json[1], true); if($json === null){ @@ -1176,7 +1176,7 @@ class brave{ }else{ $thumb = [ - "url" => $this->unshiturl($news["thumbnail"]["src"]), + "url" => $this->fixURLthingie($news["thumbnail"]["src"]), "ratio" => "16:9" ]; } @@ -1251,7 +1251,7 @@ class brave{ } $json = - $this->fuckhtml + $this->heckhtml ->parseJsObject( $json[1] ); @@ -1356,7 +1356,7 @@ class brave{ } } - $this->fuckhtml->load($html); + $this->heckhtml->load($html); $out = [ "status" => "ok", @@ -1396,7 +1396,7 @@ class brave{ } $json = - $this->fuckhtml + $this->heckhtml ->parseJsObject( $json[1] ); @@ -1461,20 +1461,20 @@ class brave{ $i = 0; $answer = []; - $this->fuckhtml->load($html); + $this->heckhtml->load($html); foreach( - $this->fuckhtml->getElementsByTagName("*") + $this->heckhtml->getElementsByTagName("*") as $snippet ){ switch($snippet["tagName"]){ case "p": - $this->fuckhtml->load($snippet["innerHTML"]); + $this->heckhtml->load($snippet["innerHTML"]); $codetags = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("*"); $tmphtml = $snippet["innerHTML"]; @@ -1493,7 +1493,7 @@ class brave{ 2 ); - $value = $this->fuckhtml->getTextContent($tmphtml[0], false, false); + $value = $this->heckhtml->getTextContent($tmphtml[0], false, false); $this->appendtext($value, $answer, $i); $type = null; @@ -1506,7 +1506,7 @@ class brave{ } if($type !== null){ - $value = $this->fuckhtml->getTextContent($tag, false, true); + $value = $this->heckhtml->getTextContent($tag, false, true); if(trim($value) != ""){ @@ -1542,7 +1542,7 @@ class brave{ if(strlen($tmphtml) !== 0){ - $value = $this->fuckhtml->getTextContent($tmphtml, false, false); + $value = $this->heckhtml->getTextContent($tmphtml, false, false); $this->appendtext($value, $answer, $i); } break; @@ -1551,7 +1551,7 @@ class brave{ $answer[] = [ "type" => "image", "url" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $tag["attributes"]["src"] ) @@ -1574,7 +1574,7 @@ class brave{ "type" => "code", "value" => rtrim( - $this->fuckhtml + $this->heckhtml ->getTextContent( $snippet, true, @@ -1589,9 +1589,9 @@ class brave{ case "ol": $o = 0; - $this->fuckhtml->load($snippet); + $this->heckhtml->load($snippet); $li = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("li"); foreach($li as $elem){ @@ -1599,7 +1599,7 @@ class brave{ $this->appendtext( $o . ". " . - $this->fuckhtml + $this->heckhtml ->getTextContent( $elem ), @@ -1683,7 +1683,7 @@ class brave{ $html = explode( ":", - $this->fuckhtml->getTextContent($html), + $this->heckhtml->getTextContent($html), 2 ); @@ -1699,7 +1699,7 @@ class brave{ private function getimagelinkfromstyle($thumb){ $thumb = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $thumb, "div" @@ -1721,7 +1721,7 @@ class brave{ $thumb ); - $url = $this->fuckhtml->getTextContent($this->unshiturl(trim($thumb[1], '"\' '))); + $url = $this->heckhtml->getTextContent($this->fixURLthingie(trim($thumb[1], '"\' '))); if(parse_url($url, PHP_URL_HOST) == "cdn.search.brave.com"){ @@ -1770,7 +1770,7 @@ class brave{ private function generatenextpagetoken($q, $nsfw, $country, $spellcheck, $page, $proxy){ $nextpage = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName("btn", "a"); if(count($nextpage) !== 0){ @@ -1780,7 +1780,7 @@ class brave{ if( strtolower( - $this->fuckhtml + $this->heckhtml ->getTextContent( $nextpage ) @@ -1789,7 +1789,7 @@ class brave{ preg_match( '/offset=([0-9]+)/', - $this->fuckhtml->getTextContent($nextpage["attributes"]["href"]), + $this->heckhtml->getTextContent($nextpage["attributes"]["href"]), $nextpage ); @@ -1813,7 +1813,7 @@ class brave{ return null; } - private function unshiturl($url){ + private function fixURLthingie($url){ // https://imgs.search.brave.com/XFnbR8Sl7ge82MBDEH7ju0UHImRovMVmQ2qnDvgNTuA/rs:fit:844:225:1/g:ce/aHR0cHM6Ly90c2U0/Lm1tLmJpbmcubmV0/L3RoP2lkPU9JUC54/UWotQXU5N2ozVndT/RDJnNG9BNVhnSGFF/SyZwaWQ9QXBp.jpeg diff --git a/scraper/crowdview.php b/scraper/crowdview.php index 8fb267b..51ea9c5 100644 --- a/scraper/crowdview.php +++ b/scraper/crowdview.php @@ -7,8 +7,8 @@ class crowdview{ include "lib/backend.php"; $this->backend = new backend("crowdview"); - include "lib/fuckhtml.php"; - $this->fuckhtml = new fuckhtml(); + include "lib/heckhtml.php"; + $this->heckhtml = new heckhtml(); } public function getfilters($page){ @@ -133,7 +133,7 @@ class crowdview{ return trim( - $this->fuckhtml + $this->heckhtml ->getTextContent( html_entity_decode( $html diff --git a/scraper/curlie.php b/scraper/curlie.php index 61a8eb2..6364d6c 100644 --- a/scraper/curlie.php +++ b/scraper/curlie.php @@ -7,8 +7,8 @@ class curlie{ include "lib/backend.php"; $this->backend = new backend("curlie"); - include "lib/fuckhtml.php"; - $this->fuckhtml = new fuckhtml(); + include "lib/heckhtml.php"; + $this->heckhtml = new heckhtml(); } public function getfilters($page){ @@ -208,10 +208,10 @@ class curlie{ } } - $this->fuckhtml->load($html); + $this->heckhtml->load($html); $nextpage = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( "next-page", "a" @@ -247,7 +247,7 @@ class curlie{ ]; $items = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( "site-item", "div" @@ -255,10 +255,10 @@ class curlie{ foreach($items as $item){ - $this->fuckhtml->load($item); + $this->heckhtml->load($item); $a = - $this->fuckhtml + $this->heckhtml ->getElementsByAttributeValue( "target", "_blank", @@ -266,13 +266,13 @@ class curlie{ )[0]; $description = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName("site-descr"); if(count($description) !== 0){ $description = - $this->fuckhtml + $this->heckhtml ->getTextContent( $description[0] ); @@ -283,13 +283,13 @@ class curlie{ $out["web"][] = [ "title" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $a ), "description" => $description, "url" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $a["attributes"]["href"] ), diff --git a/scraper/ddg.php b/scraper/ddg.php index 4a0d11f..26545db 100644 --- a/scraper/ddg.php +++ b/scraper/ddg.php @@ -7,8 +7,8 @@ class ddg{ include "lib/backend.php"; $this->backend = new backend("ddg"); - include "lib/fuckhtml.php"; - $this->fuckhtml = new fuckhtml(); + include "lib/heckhtml.php"; + $this->heckhtml = new heckhtml(); } /* @@ -1288,18 +1288,18 @@ class ddg{ } /* - Get shitcoin conversions + Get coin conversions */ if($extendedsearch){ if( preg_match( '/"https?:\/\/(?:www\.coinbase\.com\/converter\/([a-z0-9]+)\/([a-z0-9]+)|changelly\.com\/exchange\/([a-z0-9]+)\/([a-z0-9]+)|coinmarketcap\.com\/currencies\/[a-z0-9]+\/([a-z0-9]+)\/([a-z0-9]+))\/?"/', $js, - $shitcoins + $coins ) ){ - $shitcoins = array_values(array_filter($shitcoins)); + $coins = array_values(array_filter($coins)); preg_match( '/(?:[\s,.]*[0-9]+)+/', @@ -1319,28 +1319,28 @@ class ddg{ $description = []; - $shitcoinjs = $this->get( + $coinjs = $this->get( $proxy, - "https://duckduckgo.com/js/spice/cryptocurrency/{$shitcoins[1]}/{$shitcoins[2]}/1", + "https://duckduckgo.com/js/spice/cryptocurrency/{$coins[1]}/{$coins[2]}/1", [], ddg::req_xhr ); preg_match( '/ddg_spice_cryptocurrency\(\s*({[\S\s]*})\s*\);/', - $shitcoinjs, - $shitcoinjson + $coinjs, + $coinjson ); - $shitcoinjson = json_decode($shitcoinjson[1], true); + $coinjson = json_decode($coinjson[1], true); if( - !isset($shitcoinjson["error"]) && - $shitcoinjson["status"]["error_code"] == 0 + !isset($coinjson["error"]) && + $coinjson["status"]["error_code"] == 0 ){ - $shitcoinjson = $shitcoinjson["data"]; - $array_values = array_values($shitcoinjson["quote"])[0]; + $coinjson = $coinjson["data"]; + $array_values = array_values($coinjson["quote"])[0]; if($amount != 1){ @@ -1353,8 +1353,8 @@ class ddg{ $description[] = [ "type" => "text", "value" => - "{$amount} {$shitcoinjson["name"]} ({$shitcoinjson["symbol"]}) = " . $this->number_format($array_values["price"] * $amount) . " " . strtoupper($shitcoins[2]) . "\n" . - "{$amount} " . strtoupper($shitcoins[2]) . " = " . $this->number_format((1 / $array_values["price"]) * $amount) . " {$shitcoinjson["symbol"]}" + "{$amount} {$coinjson["name"]} ({$coinjson["symbol"]}) = " . $this->number_format($array_values["price"] * $amount) . " " . strtoupper($coins[2]) . "\n" . + "{$amount} " . strtoupper($coins[2]) . " = " . $this->number_format((1 / $array_values["price"]) * $amount) . " {$coinjson["symbol"]}" ]; } @@ -1367,19 +1367,19 @@ class ddg{ $description[] = [ "type" => "text", "value" => - "1 {$shitcoinjson["name"]} ({$shitcoinjson["symbol"]}) = " . $this->number_format($array_values["price"]) . " " . strtoupper($shitcoins[2]) . "\n" . - "1 " . strtoupper($shitcoins[2]) . " = " . $this->number_format(1 / $array_values["price"]) . " {$shitcoinjson["symbol"]}" + "1 {$coinjson["name"]} ({$coinjson["symbol"]}) = " . $this->number_format($array_values["price"]) . " " . strtoupper($coins[2]) . "\n" . + "1 " . strtoupper($coins[2]) . " = " . $this->number_format(1 / $array_values["price"]) . " {$coinjson["symbol"]}" ]; $description[] = [ "type" => "quote", - "value" => "Last fetched: " . date("jS \of F Y @ g:ia", strtotime($shitcoinjson["last_updated"])) + "value" => "Last fetched: " . date("jS \of F Y @ g:ia", strtotime($coinjson["last_updated"])) ]; $out["answer"][] = [ - "title" => $shitcoinjson["name"] . " (" . strtoupper($shitcoins[1]) . ") & " . strtoupper($shitcoins[2]) . " market", + "title" => $coinjson["name"] . " (" . strtoupper($coins[1]) . ") & " . strtoupper($coins[2]) . " market", "description" => $description, - "url" => "https://coinmarketcap.com/converter/" . strtoupper($shitcoins[1]) . "/" . strtoupper($shitcoins[2]) . "/?amt={$amount}", + "url" => "https://coinmarketcap.com/converter/" . strtoupper($coins[1]) . "/" . strtoupper($coins[2]) . "/?amt={$amount}", "thumb" => null, "table" => [], "sublink" => [] @@ -2470,9 +2470,9 @@ class ddg{ $i = 0; $answer = []; - $this->fuckhtml->load($html); + $this->heckhtml->load($html); - $tags = $this->fuckhtml->getElementsByTagName("*"); + $tags = $this->heckhtml->getElementsByTagName("*"); if(count($tags) === 0){ @@ -2489,10 +2489,10 @@ class ddg{ switch($snippet["tagName"]){ case "p": - $this->fuckhtml->load($snippet["innerHTML"]); + $this->heckhtml->load($snippet["innerHTML"]); $codetags = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("*"); $tmphtml = $snippet["innerHTML"]; @@ -2511,7 +2511,7 @@ class ddg{ 2 ); - $value = $this->fuckhtml->getTextContent($tmphtml[0], false, false); + $value = $this->heckhtml->getTextContent($tmphtml[0], false, false); $this->appendtext($value, $answer, $i); $type = null; @@ -2524,7 +2524,7 @@ class ddg{ } if($type !== null){ - $value = $this->fuckhtml->getTextContent($tag, false, false); + $value = $this->heckhtml->getTextContent($tag, false, false); if(trim($value) != ""){ @@ -2552,7 +2552,7 @@ class ddg{ if(strlen($tmphtml) !== 0){ - $value = $this->fuckhtml->getTextContent($tmphtml, true, false); + $value = $this->heckhtml->getTextContent($tmphtml, true, false); $this->appendtext($value, $answer, $i); } break; @@ -2561,7 +2561,7 @@ class ddg{ $answer[] = [ "type" => "image", "url" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $tag["attributes"]["src"] ) @@ -2583,7 +2583,7 @@ class ddg{ "type" => "code", "value" => rtrim( - $this->fuckhtml + $this->heckhtml ->getTextContent( $snippet, true, @@ -2598,9 +2598,9 @@ class ddg{ case "ol": $o = 0; - $this->fuckhtml->load($snippet); + $this->heckhtml->load($snippet); $li = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("li"); foreach($li as $elem){ @@ -2608,7 +2608,7 @@ class ddg{ $this->appendtext( $o . ". " . - $this->fuckhtml + $this->heckhtml ->getTextContent( $elem ), diff --git a/scraper/google.php b/scraper/google.php index 50bcc22..e293c30 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -11,8 +11,8 @@ class google{ public function __construct(){ - include "lib/fuckhtml.php"; - $this->fuckhtml = new fuckhtml(); + include "lib/heckhtml.php"; + $this->heckhtml = new heckhtml(); include "lib/backend.php"; $this->backend = new backend("google"); @@ -1002,16 +1002,16 @@ class google{ // decode UTF-16 string $answer = - $this->fuckhtml + $this->heckhtml ->parseJsString( $accdefs_regex[2][$i] ); - $this->fuckhtml->load($answer); + $this->heckhtml->load($answer); // get description $description = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1032,7 +1032,7 @@ class google{ // get date (rare) $date = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("sub"); if(count($date) !== 0){ @@ -1046,7 +1046,7 @@ class google{ $date = strtotime( - $this->fuckhtml + $this->heckhtml ->getTextContent( $date[0] ) @@ -1060,34 +1060,34 @@ class google{ $table = []; $tbody = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("tbody"); if(count($tbody) !== 0){ - $this->fuckhtml->load($tbody[0]); + $this->heckhtml->load($tbody[0]); $trs = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("tr"); foreach($trs as $tr){ - $this->fuckhtml->load($tr); + $this->heckhtml->load($tr); $tds = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("td"); if(count($tds) === 2){ $table[ - $this->fuckhtml + $this->heckhtml ->getTextContent( $tds[0] ) ] = - $this->fuckhtml + $this->heckhtml ->getTextContent( $tds[1] ); @@ -1095,18 +1095,18 @@ class google{ } // load back what we had - $this->fuckhtml->load($answer); + $this->heckhtml->load($answer); } // get title & link $a = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("a")[0]; - $this->fuckhtml->load($a); + $this->heckhtml->load($a); $title = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("span"); if(count($title) === 0){ @@ -1117,18 +1117,18 @@ class google{ $accdefs[] = [ "title" => $this->titledots( - $this->fuckhtml + $this->heckhtml ->getTextContent( $title[0] ) ), "description" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $description ), "url" => - $this->unshiturl( + $this->fixURLthingie( $a["attributes"]["href"] ), "date" => $date, @@ -1142,10 +1142,10 @@ class google{ ]; } - $this->fuckhtml->load($html); + $this->heckhtml->load($html); $containers = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1161,11 +1161,11 @@ class google{ foreach($containers as $container){ - $this->fuckhtml->load($container); + $this->heckhtml->load($container); // detect spelling $spelling = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1182,13 +1182,13 @@ class google{ if(count($spelling) !== 0){ $a = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("a"); if(count($a) !== 0){ $scripts = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("script"); foreach($scripts as $script){ @@ -1202,7 +1202,7 @@ class google{ } $container["innerHTML"] = - $this->fuckhtml + $this->heckhtml ->getTextContent( str_replace( $a[0]["outerHTML"], @@ -1222,7 +1222,7 @@ class google{ "type" => "not_many", "using" => $search, "correction" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $a[0] ) @@ -1239,7 +1239,7 @@ class google{ $out["spelling"] = [ "type" => "including", "using" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $a[0] ), @@ -1252,7 +1252,7 @@ class google{ } $title = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1273,7 +1273,7 @@ class google{ $web = [ "title" => $this->titledots( - $this->fuckhtml + $this->heckhtml ->getTextContent( $title[0] ) @@ -1292,8 +1292,8 @@ class google{ // get link $web["url"] = - $this->unshiturl( - $this->fuckhtml + $this->fixURLthingie( + $this->heckhtml ->getElementsByTagName("a") [0] ["attributes"] @@ -1352,12 +1352,12 @@ class google{ // // no carousel entries, parse as normal link // - $this->fuckhtml->load($container); + $this->heckhtml->load($container); // parse URL $web["url"] = - $this->unshiturl( - $this->fuckhtml + $this->fixURLthingie( + $this->heckhtml ->getElementsByTagName("a") [0] ["attributes"] @@ -1367,7 +1367,7 @@ class google{ $container = $container["innerHTML"]; $line_detect = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1387,7 +1387,7 @@ class google{ $featured = true; $description_container = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1401,13 +1401,13 @@ class google{ // get date node for it $date = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("sub"); if(count($date) !== 0){ $web["date"] = strtotime( - $this->fuckhtml + $this->heckhtml ->getTextContent( $date[0] ) @@ -1419,7 +1419,7 @@ class google{ $featured = false; $description_container = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1437,7 +1437,7 @@ class google{ if($pagetype == "news"){ $author = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1456,7 +1456,7 @@ class google{ if(count($author) !== 0){ $web["author"] = - $this->fuckhtml + $this->heckhtml ->getTextContent( $author[0] ); @@ -1469,13 +1469,13 @@ class google{ $description = $description_container["innerHTML"]; - $this->fuckhtml->load($description); + $this->heckhtml->load($description); // // get thumbnail before we call loadhtml again // $img = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("img"); if(count($img) !== 0){ @@ -1508,7 +1508,7 @@ class google{ // get sublinks // $links = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("a"); foreach($links as $link){ @@ -1535,14 +1535,14 @@ class google{ $sublink["title"] = $this->titledots( - $this->fuckhtml + $this->heckhtml ->getTextContent( $link ) ); $sublink["url"] = - $this->unshiturl( + $this->fixURLthingie( $link ["attributes"] ["href"] @@ -1557,12 +1557,12 @@ class google{ // // Parse spans in description // - $this->fuckhtml->load($description); + $this->heckhtml->load($description); if($featured === false){ $levels = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1573,7 +1573,7 @@ class google{ "div" ); - // oh my god yes, fucking great, sometimes there are NO levels + // oh my god yes, this is hecking great, sometimes there are NO levels // hahahahahhahahahahahahahahahhahaa if(count($levels) === 0){ @@ -1582,10 +1582,10 @@ class google{ foreach($levels as $level){ - $this->fuckhtml->load($level); + $this->heckhtml->load($level); $spans = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName( "span" ); @@ -1596,7 +1596,7 @@ class google{ $innertext = trim( - $this->fuckhtml + $this->heckhtml ->getTextContent( $span ), @@ -1706,7 +1706,7 @@ class google{ // If we reach this point: // 1. Ratings have been parsed - // 2. We're parsing a WEB link, not some shitty piece of shit + // 2. We're parsing a WEB link // check for date // if span has no text before it, assume it's a date @@ -1718,7 +1718,7 @@ class google{ ); if( - $this->fuckhtml + $this->heckhtml ->getTextContent( $desc_split[0] ) == "" @@ -1744,10 +1744,10 @@ class google{ // Ready to parse table if(count($desc_split) === 2){ - $this->fuckhtml->load($desc_split[1]); + $this->heckhtml->load($desc_split[1]); $web["table"][ - $this->fuckhtml + $this->heckhtml ->getTextContent( trim($desc_split[0], ": ") ) @@ -1767,7 +1767,7 @@ class google{ $web["description"] = trim( - $this->fuckhtml + $this->heckhtml ->getTextContent( $description ), @@ -1785,10 +1785,10 @@ class google{ } // - // Detect wikipedia shit + // Detect Wikipedia // $wiki_title = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("h3"); if(count($wiki_title) !== 0){ @@ -1799,7 +1799,7 @@ class google{ $sublink = []; $as = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("a"); foreach($as as $a){ @@ -1816,7 +1816,7 @@ class google{ // get carousels and remove them from container for image grepper $carousels = $this->parsecarousels($container["innerHTML"]); - $this->fuckhtml->load($container); + $this->heckhtml->load($container); // add images to image tab, if applicable for($i=0; $ifuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1878,7 +1878,7 @@ class google{ $description_after[] = [ "type" => "title", "value" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $titles[$i] ) @@ -1908,7 +1908,7 @@ class google{ } $categories = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -1919,7 +1919,7 @@ class google{ ); $image = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("img"); if(count($image) !== 0){ @@ -1934,20 +1934,20 @@ class google{ for($i=0; $ifuckhtml->load($categories[$i]); + $this->heckhtml->load($categories[$i]); if($i === 0){ // first node. this should be the header with the small // information snippet $url = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("a"); if(count($url) !== 0){ $url = - $this->unshiturl( + $this->fixURLthingie( $url[0]["attributes"]["href"] ); @@ -1969,7 +1969,7 @@ class google{ ); $subtext = - $this->fuckhtml + $this->heckhtml ->getTextContent( $categories[$i]["innerHTML"] ); @@ -1979,7 +1979,7 @@ class google{ $description[] = [ "type" => "quote", "value" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $categories[$i]["innerHTML"] ) @@ -1988,7 +1988,7 @@ class google{ // detect audio file $audio = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("audio"); if(count($audio) !== 0){ @@ -1996,7 +1996,7 @@ class google{ $description[] = [ "type" => "audio", "url" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $audio[0]["attributes"]["src"] ) @@ -2006,7 +2006,7 @@ class google{ // check for separator elements IN THERE $separators = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -2021,7 +2021,7 @@ class google{ // detect container type foreach($separators as $separator){ - $this->fuckhtml->load($separator); + $this->heckhtml->load($separator); // ignore wrong levels if($separator["level"] !== 2){ @@ -2033,7 +2033,7 @@ class google{ // Detect word definition // $wordwraps = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -2048,18 +2048,18 @@ class google{ foreach($wordwraps as $word){ - $this->fuckhtml->load($word); + $this->heckhtml->load($word); // detect title $span = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName( "span" ); if( count($span) === 1 && - $this->fuckhtml + $this->heckhtml ->getTextContent( str_replace( $span[0]["outerHTML"], @@ -2072,7 +2072,7 @@ class google{ $description[] = [ "type" => "title", "value" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $span[0] ) @@ -2082,27 +2082,27 @@ class google{ // detect list element $lists = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("ol"); if(count($lists) !== 0){ foreach($lists as $list){ - $this->fuckhtml->load($list); + $this->heckhtml->load($list); $items = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("li"); $w = 0; foreach($items as $item){ $w++; - $this->fuckhtml->load($item); + $this->heckhtml->load($item); // get subnodes $subnodes = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -2116,10 +2116,10 @@ class google{ foreach($subnodes as $subnode){ - $this->fuckhtml->load($subnode); + $this->heckhtml->load($subnode); $spans = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("span"); if(count($spans) !== 0){ @@ -2128,7 +2128,7 @@ class google{ $description[] = [ "type" => "quote", "value" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $subnode ) @@ -2140,7 +2140,7 @@ class google{ "type" => "text", "value" => $w . ". " . - $this->fuckhtml + $this->heckhtml ->getTextContent( $subnode ) @@ -2154,7 +2154,7 @@ class google{ // parse without list // get subnodes $subnodes = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -2168,10 +2168,10 @@ class google{ foreach($subnodes as $subnode){ - $this->fuckhtml->load($subnode); + $this->heckhtml->load($subnode); $spans = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("span"); if(count($spans) !== 0){ @@ -2180,7 +2180,7 @@ class google{ $description[] = [ "type" => "quote", "value" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $subnode ) @@ -2191,7 +2191,7 @@ class google{ $description[] = [ "type" => "text", "value" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $subnode ) @@ -2206,7 +2206,7 @@ class google{ // Parse table // $spans = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("span"); foreach($spans as $span){ @@ -2217,7 +2217,7 @@ class google{ $row = explode( ":", - $this->fuckhtml + $this->heckhtml ->getTextContent( $separator ), @@ -2238,7 +2238,7 @@ class google{ // Parse normal description // $links_rem = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("a"); foreach($links_rem as $rem){ @@ -2255,7 +2255,7 @@ class google{ "type" => "text", "value" => rtrim( - $this->fuckhtml + $this->heckhtml ->getTextContent( $separator ), @@ -2268,7 +2268,7 @@ class google{ // detect huge buttons $buttons = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -2289,12 +2289,12 @@ class google{ if(isset($button["attributes"]["href"])){ $sublink[ - $this->fuckhtml + $this->heckhtml ->getTextContent( $button ) ] = - $this->unshiturl( + $this->fixURLthingie( $button["attributes"]["href"] ); } @@ -2310,7 +2310,7 @@ class google{ $out["answer"][] = [ "title" => - $this->fuckhtml + $this->heckhtml ->getTextContent( $wiki_title[0] ), @@ -2328,7 +2328,7 @@ class google{ // Detect related searches containers // $container_title = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -2347,7 +2347,7 @@ class google{ // get carousel entries $carousels = $this->parsecarousels($container["innerHTML"]); - $this->fuckhtml->load($container); + $this->heckhtml->load($container); foreach($carousels as $carousel){ @@ -2362,7 +2362,7 @@ class google{ $container_title = strtolower( - $this->fuckhtml + $this->heckhtml ->getTextContent( $container_title[0] ) @@ -2376,13 +2376,13 @@ class google{ // Parse related searches // $as = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("a"); foreach($as as $a){ $out["related"][] = - $this->fuckhtml + $this->heckhtml ->getTextContent($a); } break; @@ -2390,7 +2390,7 @@ class google{ case "people also ask": // get related queries $divs = - $this->fuckhtml + $this->heckhtml ->getElementsByTagName("div"); foreach($divs as $div){ @@ -2406,7 +2406,7 @@ class google{ if(isset($div["attributes"]["role"])){ $out["related"][] = - $this->fuckhtml + $this->heckhtml ->getTextContent($div); continue; @@ -2422,7 +2422,7 @@ class google{ // Parse news // $title = - $this->fuckhtml + $this->heckhtml ->getElementsByClassName( $this->findstyles( [ @@ -2438,7 +2438,7 @@ class google{ if(count($title) !== 0){ $carousels = $this->parsecarousels(); - $this->fuckhtml->load($container); + $this->heckhtml->load($container); if(count($carousels) === 0){ @@ -2448,7 +2448,7 @@ class google{ $title = strtolower( - $this->fuckhtml + $this->heckhtml ->getTextContent( $title[0] ) @@ -2525,7 +2525,7 @@ class google{ // ignore elements with