0
Fork 0
mirror of https://github.com/penpot/penpot.git synced 2025-02-12 18:18:24 -05:00

🎉 Add garbage collection task for file thumbnails

And additionally, rename the current task to file-gc
to match the real purpose of the task.
This commit is contained in:
Andrey Antukh 2022-03-23 10:59:20 +01:00 committed by Alonso Torres
parent b87e3c22b3
commit 2832736826
9 changed files with 352 additions and 219 deletions

View file

@ -233,14 +233,14 @@
([ds table params opts]
(exec-one! ds
(sql/insert table params opts)
(assoc opts :return-keys true))))
(merge {:return-keys true} opts))))
(defn insert-multi!
([ds table cols rows] (insert-multi! ds table cols rows nil))
([ds table cols rows opts]
(exec! ds
(sql/insert-multi table cols rows opts)
(assoc opts :return-keys true))))
(merge {:return-keys true} opts))))
(defn update!
([ds table params where] (update! ds table params where nil))

View file

@ -189,7 +189,7 @@
:pool (ig/ref :app.db/pool)
:entries
[{:cron #app/cron "0 0 0 * * ?" ;; daily
:task :file-media-gc}
:task :file-gc}
{:cron #app/cron "0 0 * * * ?" ;; hourly
:task :file-xlog-gc}
@ -231,7 +231,7 @@
:tasks
{:sendmail (ig/ref :app.emails/sendmail-handler)
:objects-gc (ig/ref :app.tasks.objects-gc/handler)
:file-media-gc (ig/ref :app.tasks.file-media-gc/handler)
:file-gc (ig/ref :app.tasks.file-gc/handler)
:file-xlog-gc (ig/ref :app.tasks.file-xlog-gc/handler)
:storage-deleted-gc (ig/ref :app.storage/gc-deleted-task)
:storage-touched-gc (ig/ref :app.storage/gc-touched-task)
@ -262,7 +262,7 @@
:storage (ig/ref :app.storage/storage)
:max-age cf/deletion-delay}
:app.tasks.file-media-gc/handler
:app.tasks.file-gc/handler
{:pool (ig/ref :app.db/pool)
:max-age cf/deletion-delay}

View file

@ -58,8 +58,9 @@
(db/insert! conn :file-profile-rel))))
(defn create-file
[conn {:keys [id name project-id is-shared data deleted-at]
[conn {:keys [id name project-id is-shared data deleted-at revn]
:or {is-shared false
revn 0
deleted-at nil}
:as params}]
(let [id (or id (:id data) (uuid/next))
@ -68,6 +69,7 @@
{:id id
:project-id project-id
:name name
:revn revn
:is-shared is-shared
:data (blob/encode data)
:deleted-at deleted-at})]
@ -500,13 +502,13 @@
;; --- Mutation: Upsert file thumbnail
(def sql:upsert-file-thumbnail
"insert into file_thumbnail(file_id, revn, data, props)
values (?, ?, ?, ?)
"insert into file_thumbnail (file_id, revn, data, props)
values (?, ?, ?, ?::jsonb)
on conflict(file_id, revn) do
update set data = ?, updated_at=now();")
update set data = ?, props=?, updated_at=now();")
(s/def ::revn ::us/integer)
(s/def ::props (s/map-of ::us/keyword any?))
(s/def ::revn ::us/integer)
(s/def ::props map?)
(s/def ::upsert-file-thumbnail
(s/keys :req-un [::profile-id ::file-id ::revn ::data ::props]))
@ -516,5 +518,5 @@
(files/check-edition-permissions! conn profile-id file-id)
(let [props (db/tjson (or props {}))]
(db/exec-one! conn [sql:upsert-file-thumbnail
file-id revn data props data])
file-id revn data props data props])
nil)))

View file

@ -440,7 +440,7 @@
(let [params (cond-> {:file-id file-id}
frame-id (assoc :frame-id frame-id))
rows (db/query pool :file-frame-thumbnail params)]
(d/group-by :frame-id :data rows)))
(d/index-by :frame-id :data rows)))
;; --- QUERY: get file thumbnail
@ -465,10 +465,11 @@
(ex/raise :type :not-found
:code :file-thumbnail-not-found))
(with-meta {:data (:data row)
:props (some-> (:props row) db/decode-transit-pgobject)
:revn (:revn row)
:file-id (:file-id row)}
(with-meta
{:data (:data row)
:props (some-> (:props row) db/decode-transit-pgobject)
:revn (:revn row)
:file-id (:file-id row)}
{:transform-response (rpch/http-cache {:max-age (* 1000 60 60)})})))
;; --- Helpers

View file

@ -0,0 +1,164 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;;
;; Copyright (c) UXBOX Labs SL
(ns app.tasks.file-gc
"A maintenance task that is responsible of: purge unused file media,
clean unused frame thumbnails and remove old file thumbnails. The
file is eligible to be garbage collected after some period of
inactivity (the default threshold is 72h)."
(:require
[app.common.data :as d]
[app.common.logging :as l]
[app.common.pages.helpers :as cph]
[app.common.pages.migrations :as pmg]
[app.db :as db]
[app.util.blob :as blob]
[app.util.time :as dt]
[clojure.spec.alpha :as s]
[integrant.core :as ig]))
(declare ^:private retrieve-candidates)
(declare ^:private process-file)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; HANDLER
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(s/def ::max-age ::dt/duration)
(defmethod ig/pre-init-spec ::handler [_]
(s/keys :req-un [::db/pool ::max-age]))
(defmethod ig/init-key ::handler
[_ {:keys [pool] :as cfg}]
(fn [_]
(db/with-atomic [conn pool]
(let [cfg (assoc cfg :conn conn)]
(loop [total 0
files (retrieve-candidates cfg)]
(if-let [file (first files)]
(do
(process-file cfg file)
(recur (inc total)
(rest files)))
(do
(l/debug :msg "finished processing files" :processed total)
{:processed total})))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; IMPL
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private
sql:retrieve-candidates-chunk
"select f.id,
f.data,
f.revn,
f.modified_at
from file as f
where f.has_media_trimmed is false
and f.modified_at < now() - ?::interval
and f.modified_at < ?
order by f.modified_at desc
limit 1
for update skip locked")
(defn- retrieve-candidates
[{:keys [conn max-age] :as cfg}]
(let [interval (db/interval max-age)
get-chunk
(fn [cursor]
(let [rows (db/exec! conn [sql:retrieve-candidates-chunk interval cursor])]
[(some->> rows peek :modified-at) (seq rows)]))]
(sequence cat (d/iteration get-chunk
:vf second
:kf first
:initk (dt/now)))))
(defn- collect-used-media
[data]
(let [xform (comp
(map :objects)
(mapcat vals)
(keep (fn [{:keys [type] :as obj}]
(case type
:path (get-in obj [:fill-image :id])
:image (get-in obj [:metadata :id])
nil))))
pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(-> #{}
(into xform pages)
(into (keys (:media data))))))
(defn- clean-file-media!
"Performs the garbage collection of file media objects."
[conn file-id data]
(let [used (collect-used-media data)
unused (->> (db/query conn :file-media-object {:file-id file-id})
(remove #(contains? used (:id %))))]
(doseq [mobj unused]
(l/debug :hint "delete file media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
;; NOTE: deleting the file-media-object in the database
;; automatically marks as touched the referenced storage
;; objects. The touch mechanism is needed because many files can
;; point to the same storage objects and we can't just delete
;; them.
(db/delete! conn :file-media-object {:id (:id mobj)}))))
(defn- collect-frames
[data]
(let [xform (comp
(map :objects)
(mapcat vals)
(filter cph/frame-shape?)
(keep :id))
pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(into #{} xform pages)))
(defn- clean-file-frame-thumbnails!
[conn file-id data]
(let [sql (str "delete from file_frame_thumbnail "
" where file_id=? and not (frame_id=ANY(?))")
ids (->> (collect-frames data)
(db/create-array conn "uuid"))
res (db/exec-one! conn [sql file-id ids])]
(l/debug :hint "delete frame thumbnails" :total (:next.jdbc/update-count res))))
(defn- clean-file-thumbnails!
[conn file-id revn]
(let [sql (str "delete from file_thumbnail "
" where file_id=? and revn < ?")
res (db/exec-one! conn [sql file-id revn])]
(l/debug :hint "delete file thumbnails" :total (:next.jdbc/update-count res))))
(defn- process-file
[{:keys [conn] :as cfg} {:keys [id data revn modified-at] :as file}]
(l/debug :hint "processing file" :id id :modified-at modified-at)
(let [data (-> (blob/decode data)
(assoc :id id)
(pmg/migrate-data))]
(clean-file-media! conn id data)
(clean-file-frame-thumbnails! conn id data)
(clean-file-thumbnails! conn id revn)
;; Mark file as trimmed
(db/update! conn :file
{:has-media-trimmed true}
{:id id})
nil))

View file

@ -1,139 +0,0 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;;
;; Copyright (c) UXBOX Labs SL
(ns app.tasks.file-media-gc
"A maintenance task that is responsible to purge the unused media
objects from files. A file is eligible to be garbage collected
after some period of inactivity (the default threshold is 72h)."
(:require
[app.common.logging :as l]
[app.common.pages.helpers :as cph]
[app.common.pages.migrations :as pmg]
[app.db :as db]
[app.util.blob :as blob]
[app.util.time :as dt]
[clojure.spec.alpha :as s]
[integrant.core :as ig]))
(declare process-file)
(declare retrieve-candidates)
(s/def ::max-age ::dt/duration)
(defmethod ig/pre-init-spec ::handler [_]
(s/keys :req-un [::db/pool ::max-age]))
(defmethod ig/init-key ::handler
[_ {:keys [pool] :as cfg}]
(fn [_]
(db/with-atomic [conn pool]
(let [cfg (assoc cfg :conn conn)]
(loop [n 0]
(let [files (retrieve-candidates cfg)]
(if (seq files)
(do
(run! (partial process-file cfg) files)
(recur (+ n (count files))))
(do
(l/debug :msg "finished processing files" :processed n)
{:processed n}))))))))
(def ^:private
sql:retrieve-candidates-chunk
"select f.id,
f.data,
extract(epoch from (now() - f.modified_at))::bigint as age
from file as f
where f.has_media_trimmed is false
and f.modified_at < now() - ?::interval
order by f.modified_at asc
limit 10
for update skip locked")
(defn- retrieve-candidates
[{:keys [conn max-age] :as cfg}]
(let [interval (db/interval max-age)]
(->> (db/exec! conn [sql:retrieve-candidates-chunk interval])
(mapv (fn [{:keys [age] :as row}]
(assoc row :age (dt/duration {:seconds age})))))))
(def ^:private
collect-media-xf
(comp
(map :objects)
(mapcat vals)
(keep (fn [{:keys [type] :as obj}]
(case type
:path (get-in obj [:fill-image :id])
:image (get-in obj [:metadata :id])
nil)))))
(defn- collect-used-media
[data]
(let [pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(-> #{}
(into collect-media-xf pages)
(into (keys (:media data))))))
(def ^:private
collect-frames-xf
(comp
(map :objects)
(mapcat vals)
(filter cph/frame-shape?)
(keep :id)))
(defn- collect-frames
[data]
(let [pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(into #{} collect-frames-xf pages)))
(defn- process-file
[{:keys [conn] :as cfg} {:keys [id data age] :as file}]
(let [data (-> (blob/decode data)
(assoc :id id)
(pmg/migrate-data))]
(let [used (collect-used-media data)
unused (->> (db/query conn :file-media-object {:file-id id})
(remove #(contains? used (:id %))))]
(l/debug :hint "processing file"
:id id
:age age
:to-delete (count unused))
;; Mark file as trimmed
(db/update! conn :file
{:has-media-trimmed true}
{:id id})
(doseq [mobj unused]
(l/debug :hint "deleting media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
;; NOTE: deleting the file-media-object in the database
;; automatically marks as touched the referenced storage
;; objects. The touch mechanism is needed because many files can
;; point to the same storage objects and we can't just delete
;; them.
(db/delete! conn :file-media-object {:id (:id mobj)})))
(let [sql (str "delete from file_frame_thumbnail "
" where file_id = ? and not (frame_id = ANY(?))")
ids (->> (collect-frames data)
(db/create-array conn "uuid"))]
;; delete the unused frame thumbnails
(db/exec! conn [sql (:id file) ids]))
nil))

View file

@ -8,6 +8,7 @@
(:require
[app.common.uuid :as uuid]
[app.db :as db]
[app.db.sql :as sql]
[app.http :as http]
[app.storage :as sto]
[app.test-helpers :as th]
@ -117,7 +118,7 @@
(t/is (= 0 (count result))))))
))
(t/deftest file-media-gc-task
(t/deftest file-gc-task
(letfn [(create-file-media-object [{:keys [profile-id file-id]}]
(let [mfile {:filename "sample.jpg"
:path (th/tempfile "app/test_files/sample.jpg")
@ -130,6 +131,9 @@
:name "testfile"
:content mfile}
out (th/mutation! params)]
;; (th/print-result! out)
(t/is (nil? (:error out)))
(:result out)))
@ -189,7 +193,7 @@
(t/is (= 0 (:delete res))))
;; run the task immediately
(let [task (:app.tasks.file-media-gc/handler th/*system*)
(let [task (:app.tasks.file-gc/handler th/*system*)
res (task {})]
(t/is (= 0 (:processed res))))
@ -198,7 +202,7 @@
(th/sleep 300)
;; run the task again
(let [task (:app.tasks.file-media-gc/handler th/*system*)
(let [task (:app.tasks.file-gc/handler th/*system*)
res (task {})]
(t/is (= 1 (:processed res))))
@ -342,7 +346,7 @@
(t/is (th/ex-info? error))
(t/is (th/ex-of-type? error :not-found))))
(t/deftest deletion-test
(t/deftest deletion
(let [task (:app.tasks.objects-gc/handler th/*system*)
profile1 (th/create-profile* 1)
file (th/create-file* 1 {:project-id (:default-project-id profile1)
@ -410,71 +414,158 @@
))
(t/deftest query-frame-thumbnails
(let [prof (th/create-profile* 1 {:is-active true})
file (th/create-file* 1 {:profile-id (:id prof)
:project-id (:default-project-id prof)
:is-shared false})
data {::th/type :file-frame-thumbnail
:profile-id (:id prof)
:file-id (:id file)
:frame-id (uuid/next)}]
;;insert an entry on the database with a test value for the thumbnail of this frame
(db/exec-one! th/*pool*
["insert into file_frame_thumbnail(file_id, frame_id, data) values (?, ?, ?)"
(:file-id data) (:frame-id data) "testvalue"])
(let [out (th/query! data)]
(t/is (nil? (:error out)))
(let [result (:result out)]
(t/is (= 1 (count result)))
(t/is (= "testvalue" (:data result)))))))
(t/deftest insert-frame-thumbnails
(let [prof (th/create-profile* 1 {:is-active true})
file (th/create-file* 1 {:profile-id (:id prof)
:project-id (:default-project-id prof)
:is-shared false})
data {::th/type :upsert-frame-thumbnail
:profile-id (:id prof)
:file-id (:id file)
:frame-id (uuid/next)
:data "test insert new value"}
out (th/mutation! data)]
(t/is (nil? (:error out)))
(t/is (nil? (:result out)))
;;retrieve the value from the database and check its content
(let [result (db/exec-one!
th/*pool*
["select data from file_frame_thumbnail where file_id = ? and frame_id = ?"
(:file-id data) (:frame-id data)])]
(t/is (= "test insert new value" (:data result))))))
(t/deftest frame-thumbnails
(let [prof (th/create-profile* 1 {:is-active true})
file (th/create-file* 1 {:profile-id (:id prof)
:project-id (:default-project-id prof)
:is-shared false})
data {::th/type :upsert-frame-thumbnail
data {::th/type :file-frame-thumbnails
:profile-id (:id prof)
:file-id (:id file)
:frame-id (uuid/next)}]
;; insert an entry on the database with a test value for the thumbnail of this frame
(th/db-insert! :file-frame-thumbnail
{:file-id (:file-id data)
:frame-id (:frame-id data)
:data "testvalue"})
(let [{:keys [result error] :as out} (th/query! data)]
;; (th/print-result! out)
(t/is (nil? error))
(t/is (= 1 (count result)))
(t/is (= "testvalue" (get result (:frame-id data)))))))
(t/deftest insert-frame-thumbnails
(let [prof (th/create-profile* 1 {:is-active true})
file (th/create-file* 1 {:profile-id (:id prof)
:project-id (:default-project-id prof)
:is-shared false})
data {::th/type :upsert-file-frame-thumbnail
:profile-id (:id prof)
:file-id (:id file)
:frame-id (uuid/next)
:data "test insert new value"}]
(let [out (th/mutation! data)]
(t/is (nil? (:error out)))
(t/is (nil? (:result out)))
(let [[result] (th/db-query :file-frame-thumbnail
{:file-id (:file-id data)
:frame-id (:frame-id data)})]
(t/is (= "test insert new value" (:data result)))))))
(t/deftest upsert-frame-thumbnails
(let [prof (th/create-profile* 1 {:is-active true})
file (th/create-file* 1 {:profile-id (:id prof)
:project-id (:default-project-id prof)
:is-shared false})
data {::th/type :upsert-file-frame-thumbnail
:profile-id (:id prof)
:file-id (:id file)
:frame-id (uuid/next)
:data "updated value"}]
;;insert an entry on the database with and old value for the thumbnail of this frame
(db/exec-one! th/*pool*
["insert into file_frame_thumbnail(file_id, frame_id, data) values (?, ?, ?)"
(:file-id data) (:frame-id data) "old value"])
;; insert an entry on the database with and old value for the thumbnail of this frame
(th/db-insert! :file-frame-thumbnail
{:file-id (:file-id data)
:frame-id (:frame-id data)
:data "old value"})
(let [out (th/mutation! data)]
;; (th/print-result! out)
(t/is (nil? (:error out)))
(t/is (nil? (:result out)))
;;retrieve the value from the database and check its content
(let [result (db/exec-one!
th/*pool*
["select data from file_frame_thumbnail where file_id = ? and frame_id = ?"
(:file-id data) (:frame-id data)])]
;; retrieve the value from the database and check its content
(let [[result] (th/db-query :file-frame-thumbnail
{:file-id (:file-id data)
:frame-id (:frame-id data)})]
(t/is (= "updated value" (:data result)))))))
(t/deftest file-thumbnail-ops
(let [prof (th/create-profile* 1 {:is-active true})
file (th/create-file* 1 {:profile-id (:id prof)
:project-id (:default-project-id prof)
:revn 2
:is-shared false})
data {::th/type :file-thumbnail
:profile-id (:id prof)
:file-id (:id file)}]
(t/testing "query a thumbnail with single revn"
;; insert an entry on the database with a test value for the thumbnail of this frame
(th/db-insert! :file-thumbnail
{:file-id (:file-id data)
:revn 1
:data "testvalue1"})
(let [{:keys [result error] :as out} (th/query! data)]
;; (th/print-result! out)
(t/is (nil? error))
(t/is (= 4 (count result)))
(t/is (= "testvalue1" (:data result)))
(t/is (= 1 (:revn result)))))
(t/testing "query thumbnail with two revisions"
;; insert an entry on the database with a test value for the thumbnail of this frame
(th/db-insert! :file-thumbnail
{:file-id (:file-id data)
:revn 2
:data "testvalue2"})
(let [{:keys [result error] :as out} (th/query! data)]
;; (th/print-result! out)
(t/is (nil? error))
(t/is (= 4 (count result)))
(t/is (= "testvalue2" (:data result)))
(t/is (= 2 (:revn result))))
;; Then query the specific revn
(let [{:keys [result error] :as out} (th/query! (assoc data :revn 1))]
;; (th/print-result! out)
(t/is (nil? error))
(t/is (= 4 (count result)))
(t/is (= "testvalue1" (:data result)))
(t/is (= 1 (:revn result)))))
(t/testing "upsert file-thumbnail"
(let [data {::th/type :upsert-file-thumbnail
:profile-id (:id prof)
:file-id (:id file)
:data "foobar"
:props {:baz 1}
:revn 2}
{:keys [result error] :as out} (th/mutation! data)]
;; (th/print-result! out)
(t/is (nil? error))
(t/is (nil? result))))
(t/testing "query last result"
(let [{:keys [result error] :as out} (th/query! data)]
;; (th/print-result! out)
(t/is (nil? error))
(t/is (= 4 (count result)))
(t/is (= "foobar" (:data result)))
(t/is (= {:baz 1} (:props result)))
(t/is (= 2 (:revn result)))))
(t/testing "gc task"
;; make the file eligible for GC waiting 300ms (configured
;; timeout for testing)
(th/sleep 300)
;; run the task again
(let [task (:app.tasks.file-gc/handler th/*system*)
res (task {})]
(t/is (= 1 (:processed res))))
;; Then query the specific revn
(let [{:keys [result error] :as out} (th/query! (assoc data :revn 1))]
(t/is (= :not-found (th/ex-type error)))
(t/is (= :file-thumbnail-not-found (th/ex-code error)))))
))

View file

@ -73,7 +73,7 @@
:app.worker/cron
:app.worker/worker)
(d/deep-merge
{:app.tasks.file-media-gc/handler {:max-age (dt/duration 300)}}))
{:app.tasks.file-gc/handler {:max-age (dt/duration 300)}}))
_ (ig/load-namespaces config)
system (-> (ig/prep config)
(ig/init))]
@ -285,7 +285,8 @@
(let [data (ex-data error)]
(cond
(= :spec-validation (:code data))
(expound/printer (:data data))
(println
(us/pretty-explain data))
(= :service-error (:type data))
(print-error! (.getCause ^Throwable error))
@ -302,7 +303,7 @@
(println "====> END ERROR"))
(do
(println "====> START RESPONSE")
(prn result)
(fipp.edn/pprint result)
(println "====> END RESPONSE"))))
(defn exception?
@ -374,3 +375,15 @@
(.readLine cnsl)
nil))
(defn db-exec!
[sql]
(db/exec! *pool* sql))
(defn db-insert!
[& params]
(apply db/insert! *pool* params))
(defn db-query
[& params]
(apply db/query *pool* params))

View file

@ -128,9 +128,10 @@
(defn index-by
"Return a indexed map of the collection keyed by the result of
executing the getter over each element of the collection."
[getter coll]
(persistent!
(reduce #(assoc! %1 (getter %2) %2) (transient {}) coll)))
([kf coll] (index-by kf identity coll))
([kf vf coll]
(persistent!
(reduce #(assoc! %1 (kf %2) (vf %2)) (transient {}) coll))))
(defn index-of-pred
[coll pred]