From 7c771c82c8e03b337f9f03ae2d4afc25d3f0faca Mon Sep 17 00:00:00 2001 From: lolcat Date: Thu, 27 Jul 2023 23:06:49 -0400 Subject: [PATCH] bug fixes (without google support) --- about.php | 2 +- api.txt | 4 +- lib/frontend.php | 10 +- lib/nextpage.php | 2 +- proxy.php | 1 + scraper/google.php | 366 ++++++++++++++++++++++--------------------- settings.php | 38 ++--- template/header.html | 2 +- template/home.html | 4 +- template/images.html | 2 +- template/search.html | 2 +- 11 files changed, 219 insertions(+), 214 deletions(-) diff --git a/about.php b/about.php index fdc4812..b3fd6d9 100644 --- a/about.php +++ b/about.php @@ -60,7 +60,7 @@ $left = Provide users with a privacy oriented, extremely lightweight, ad free, free as in freedom (and free beer!) way to search for documents around the internet, with minimal, optional javascript code. My long term goal would be to build my own index (that doesn\'t suck) and provide users with an unbiased search engine, with no political inclinations.

Do you keep logs?

- I store data temporarly to get the next page of results. This might include search queries, tokens and other parameters. These parameters are encrypted using
aes-256-gcm
on the serber, for which I give you a key (also known internally as
npt
token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 7 minutes, or after it\'s used, whichever comes first.

+ I store data temporarly to get the next page of results. This might include search queries, tokens and other parameters. These parameters are encrypted using
aes-256-gcm
on the serber, for which I give you a key (also known internally as
npt
token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 15 minutes, or after it\'s used, whichever comes first.

I don\'t log IP addresses, user agents, or anything else. The
npt
tokens are the only thing that are stored (in RAM, mind you), temporarly, encrypted. diff --git a/api.txt b/api.txt index d63269f..3b45e91 100644 --- a/api.txt +++ b/api.txt @@ -73,8 +73,8 @@ impossible for a 4get operator to peek at the private data of the user after a request has been made. - The tokens will expire as soon as they are used or after a 7 minutes - inactivity period, whichever comes first. + The tokens will expire as soon as they are used or after a 15 + minutes inactivity period, whichever comes first. + Beware of null values! diff --git a/lib/frontend.php b/lib/frontend.php index e03eb1d..16e5693 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -878,7 +878,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", - "google" => "Google", + //"google" => "Google", "mojeek" => "Mojeek", "marginalia" => "Marginalia", "wiby" => "wiby" @@ -892,7 +892,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "yandex" => "Yandex", - "google" => "Google" + //"google" => "Google" ] ]; break; @@ -903,7 +903,7 @@ class frontend{ "option" => [ "yt" => "YouTube", "ddg" => "DuckDuckGo", - "google" => "Google" + //"google" => "Google" ] ]; break; @@ -914,7 +914,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", - "google" => "Google", + //"google" => "Google", "mojeek" => "Mojeek" ] ]; @@ -1285,7 +1285,7 @@ class frontend{ return htmlspecialchars($image); } - return "/proxy.php?i=" . urlencode($image) . "&s=" . $format; + return "/proxy?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ diff --git a/lib/nextpage.php b/lib/nextpage.php index a883e49..3fab855 100644 --- a/lib/nextpage.php +++ b/lib/nextpage.php @@ -28,7 +28,7 @@ class nextpage{ $this->scraper . (string)($key), gzdeflate($salt.$iv.$out.$tag), - 420 // cache information for 7 minutes blaze it + 900 // cache information for 15 minutes blaze it ); return diff --git a/proxy.php b/proxy.php index edefd77..d8b3c1b 100644 --- a/proxy.php +++ b/proxy.php @@ -109,6 +109,7 @@ try{ $image->stripImage(); $image->setFormat("jpeg"); + $image->setImageCompressionQuality(90); $image->setImageCompression(Imagick::COMPRESSION_JPEG2000); $proxy->getfilenameheader($payload["headers"], $_GET["i"]); diff --git a/scraper/google.php b/scraper/google.php index df10754..28ede6d 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -1565,18 +1565,17 @@ class google{ } /* - Fallback to parsing it as an embed + Detect if its a wikipedia thing */ + $h3 = + $this->fuckhtml + ->getElementsByTagName("h3"); - $table = [ - "title" => null, - "description" => [], - "url" => null, - "thumb" => null, - "table" => [], - "sublink" => [] - ]; + + /* + Fallback to parsing the word definitions + */ $parts = $this->fuckhtml ->getElementsByClassName( @@ -1596,12 +1595,17 @@ class google{ $head = $parts[0]; - $h3 = - $this->fuckhtml - ->getElementsByTagName("h3"); - if(count($h3) !== 0){ + $table = [ + "title" => null, + "description" => [], + "url" => null, + "thumb" => null, + "table" => [], + "sublink" => [] + ]; + $h3 = $h3[0]; $table["title"] = @@ -1626,201 +1630,201 @@ class google{ $head ) ]; - } - - $audio = - $this->fuckhtml - ->getElementsByTagName("audio"); - - if(count($audio) !== 0){ - $table["description"][] = [ - "type" => "audio", - "url" => - str_replace( - "http://", - "https://", - $this->fuckhtml - ->getTextContent( - $audio[0]["attributes"]["src"] - ) - ) - ]; - } - - if(count($parts) >= 2){ - - $this->fuckhtml->load($parts[1]); - - $parts = + $audio = $this->fuckhtml - ->getElementsByClassName( - $this->findstyles( - [ - "padding-bottom" => "12px" - ], - self::is_class - ), - "div" - ); + ->getElementsByTagName("audio"); - foreach($parts as $part){ + if(count($audio) !== 0){ - $this->fuckhtml->load($part); - - $lists = - $this->fuckhtml - ->getElementsByTagName("ol"); - - if(count($lists) !== 0){ - - foreach($lists as $list){ - - $this->fuckhtml->load($list); - - $list_items = + $table["description"][] = [ + "type" => "audio", + "url" => + str_replace( + "http://", + "https://", $this->fuckhtml - ->getElementsByTagName("li"); - - $index = 0; - - if(count($list_items) !== 0){ - - foreach($list_items as $list_item){ - - $index++; - - $this->fuckhtml->load($list_item); - - $list_subitems = - $this->fuckhtml - ->getElementsByTagName("div"); - - foreach($list_subitems as $subitem){ - - if($subitem["level"] !== 1){ continue; } - - $this->fuckhtml->load($subitem); - - $spans = - $this->fuckhtml - ->getElementsByTagName("span"); - - if(count($spans) !== 0){ - - $type = "quote"; - }else{ - - $type = "text"; - } - - $value = - $this->fuckhtml - ->getTextContent( - $subitem - ); - - if($type == "text"){ - - $value = $index . ". " . $value; - } - - $table["description"][] = [ - "type" => $type, - "value" => $value - ]; - } - } - } - } - - continue; - } - - // get title - $spans = - $this->fuckhtml - ->getElementsByTagName("span"); - - if(count($spans) !== 0){ - - foreach($spans as $span){ - - $part["innerHTML"] = - str_replace( - $span["outerHTML"], - "", - $part["innerHTML"] - ); - } - - if( - $this->fuckhtml - ->getTextContent( - $part + ->getTextContent( + $audio[0]["attributes"]["src"] + ) ) - == "" - ){ - - $table["description"][] = [ - "type" => "title", - "value" => - $this->fuckhtml - ->getTextContent( - $spans[0] - ) - ]; - - continue; - } - } + ]; + } + + if(count($parts) >= 2){ - // fallback to getting non-numbered list - $nlist = + $this->fuckhtml->load($parts[1]); + + $parts = $this->fuckhtml ->getElementsByClassName( $this->findstyles( [ - "white-space" => "pre-line", - "word-wrap" => "break-word" + "padding-bottom" => "12px" ], self::is_class ), "div" ); - if(count($nlist) !== 0){ + foreach($parts as $part){ - foreach($nlist as $nlist_item){ + $this->fuckhtml->load($part); + + $lists = + $this->fuckhtml + ->getElementsByTagName("ol"); + + if(count($lists) !== 0){ - $text = + foreach($lists as $list){ + + $this->fuckhtml->load($list); + + $list_items = + $this->fuckhtml + ->getElementsByTagName("li"); + + $index = 0; + + if(count($list_items) !== 0){ + + foreach($list_items as $list_item){ + + $index++; + + $this->fuckhtml->load($list_item); + + $list_subitems = + $this->fuckhtml + ->getElementsByTagName("div"); + + foreach($list_subitems as $subitem){ + + if($subitem["level"] !== 1){ continue; } + + $this->fuckhtml->load($subitem); + + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) !== 0){ + + $type = "quote"; + }else{ + + $type = "text"; + } + + $value = + $this->fuckhtml + ->getTextContent( + $subitem + ); + + if($type == "text"){ + + $value = $index . ". " . $value; + } + + $table["description"][] = [ + "type" => $type, + "value" => $value + ]; + } + } + } + } + + continue; + } + + // get title + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) !== 0){ + + foreach($spans as $span){ + + $part["innerHTML"] = + str_replace( + $span["outerHTML"], + "", + $part["innerHTML"] + ); + } + + if( $this->fuckhtml - ->getTextContent($nlist_item); - - if($text == ""){ + ->getTextContent( + $part + ) + == "" + ){ + + $table["description"][] = [ + "type" => "title", + "value" => + $this->fuckhtml + ->getTextContent( + $spans[0] + ) + ]; continue; } + } + + // fallback to getting non-numbered list + $nlist = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ), + "div" + ); + + if(count($nlist) !== 0){ - $this->fuckhtml->load($nlist_item); - - $spans = - $this->fuckhtml - ->getElementsByTagName("span"); - - if(count($spans) !== 0){ + foreach($nlist as $nlist_item){ - // is a quote node - $type = "quote"; - }else{ + $text = + $this->fuckhtml + ->getTextContent($nlist_item); - $type = "text"; + if($text == ""){ + + continue; + } + + $this->fuckhtml->load($nlist_item); + + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) !== 0){ + + // is a quote node + $type = "quote"; + }else{ + + $type = "text"; + } + + $table["description"][] = [ + "type" => $type, + "value" => $text + ]; } - - $table["description"][] = [ - "type" => $type, - "value" => $text - ]; } } } diff --git a/settings.php b/settings.php index 29f051d..96c31c8 100644 --- a/settings.php +++ b/settings.php @@ -70,10 +70,10 @@ $settings = [ "value" => "brave", "text" => "Brave" ], - [ - "value" => "google", - "text" => "Google" - ], + //[ + // "value" => "google", + // "text" => "Google" + //], [ "value" => "mojeek", "text" => "Mojeek" @@ -99,11 +99,11 @@ $settings = [ [ "value" => "yandex", "text" => "Yandex" - ], - [ - "value" => "google", - "text" => "Google" - ] + ]//, + //[ + // "value" => "google", + // "text" => "Google" + //] ] ], [ @@ -117,11 +117,11 @@ $settings = [ [ "value" => "ddg", "text" => "DuckDuckGo" - ], - [ - "value" => "google", - "text" => "Google" - ] + ]//, + //[ + // "value" => "google", + // "text" => "Google" + //] ] ], [ @@ -136,10 +136,10 @@ $settings = [ "value" => "brave", "text" => "Brave" ], - [ - "value" => "google", - "text" => "Google" - ], + //[ + // "value" => "google", + // "text" => "Google" + //], [ "value" => "mojeek", "text" => "Mojeek" @@ -219,7 +219,7 @@ echo '' . '' . 'Settings' . - '' . + '' . '' . '' . '' . diff --git a/template/header.html b/template/header.html index bd6fc8a..2633521 100644 --- a/template/header.html +++ b/template/header.html @@ -3,7 +3,7 @@ {%title%} - + diff --git a/template/home.html b/template/home.html index 7f00dae..8ca6377 100644 --- a/template/home.html +++ b/template/home.html @@ -4,7 +4,7 @@ 4get - + @@ -31,6 +31,6 @@ Report a problem: lolcat.ca/contact - + diff --git a/template/images.html b/template/images.html index a09c121..61e319c 100644 --- a/template/images.html +++ b/template/images.html @@ -2,6 +2,6 @@ {%images%} {%nextpage%} - + diff --git a/template/search.html b/template/search.html index bbfbb54..c187102 100644 --- a/template/search.html +++ b/template/search.html @@ -11,6 +11,6 @@ {%left%} - +