0
Fork 0
mirror of https://github.com/penpot/penpot.git synced 2025-03-13 00:01:51 -05:00

Merge pull request #4143 from penpot/niwinz-staging-file-gc

♻️ Refactor the file-gc task
This commit is contained in:
Alejandro 2024-02-14 06:48:07 +01:00 committed by GitHub
commit 6fd30d50f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 291 additions and 248 deletions

View file

@ -141,8 +141,6 @@
" WHERE flr.file_id = ANY(?) AND l.deleted_at IS NULL")]
(db/exec! conn [sql ids])))))
;; NOTE: Will be used in future, commented for satisfy linter
(def ^:private sql:get-libraries
"WITH RECURSIVE libs AS (
SELECT fl.id
@ -409,7 +407,6 @@
(update :colors relink-colors)
(d/without-nils))))))
(defn- upsert-file!
[conn file]
(let [sql (str "INSERT INTO file (id, project_id, name, revn, is_shared, data, created_at, modified_at) "

View file

@ -40,6 +40,7 @@
[promesa.util :as pu]
[yetti.adapter :as yt])
(:import
com.github.luben.zstd.ZstdIOException
com.github.luben.zstd.ZstdInputStream
com.github.luben.zstd.ZstdOutputStream
java.io.DataInputStream
@ -756,6 +757,12 @@
(pu/with-open [input (io/input-stream input)]
(read-import! (assoc cfg ::input input))))
(catch ZstdIOException cause
(ex/raise :type :validation
:code :invalid-penpot-file
:hint "invalid penpot file received: probably truncated"
:cause cause))
(catch Throwable cause
(vreset! cs cause)
(throw cause))

View file

@ -61,9 +61,6 @@
(let [result (handler)]
(events/tap :end result))
(catch Throwable cause
(binding [l/*context* (errors/request->context request)]
(l/err :hint "unexpected error process streaming response"
:cause cause))
(events/tap :error (errors/handle' cause request)))
(finally
(sp/close! events/*channel*)

View file

@ -11,7 +11,8 @@
inactivity (the default threshold is 72h)."
(:require
[app.binfile.common :as bfc]
[app.common.files.migrations :as pmg]
[app.common.files.migrations :as fmg]
[app.common.files.validate :as cfv]
[app.common.logging :as l]
[app.common.thumbnails :as thc]
[app.common.types.components-list :as ctkl]
@ -29,9 +30,256 @@
[clojure.spec.alpha :as s]
[integrant.core :as ig]))
(declare ^:private get-candidates)
(declare ^:private clean-file!)
(defn- decode-file
[cfg {:keys [id] :as file}]
(binding [pmap/*load-fn* (partial feat.fdata/load-pointer cfg id)]
(-> file
(update :features db/decode-pgarray #{})
(update :data blob/decode)
(update :data feat.fdata/process-pointers deref)
(update :data feat.fdata/process-objects (partial into {}))
(update :data assoc :id id)
(fmg/migrate-file))))
(defn- update-file!
[{:keys [::db/conn] :as cfg} {:keys [id] :as file}]
(let [file (if (contains? (:features file) "fdata/objects-map")
(feat.fdata/enable-objects-map file)
file)
file (if (contains? (:features file) "fdata/pointer-map")
(binding [pmap/*tracked* (pmap/create-tracked)]
(let [file (feat.fdata/enable-pointer-map file)]
(feat.fdata/persist-pointers! cfg id)
file))
file)
file (-> file
(update :features db/encode-pgarray conn "text")
(update :data blob/encode))]
(db/update! conn :file
{:has-media-trimmed true
:data (:data file)}
{:id id})))
(defn- process-file!
[cfg file]
(try
(let [file (decode-file cfg file)
file (clean-file! cfg file)]
(cfv/validate-file-schema! file)
(update-file! cfg file))
(catch Throwable cause
(l/err :hint "error on cleaning file (skiping)"
:file-id (str (:id file))
:cause cause))))
(def ^:private
sql:get-candidates
"SELECT f.id,
f.data,
f.revn,
f.features,
f.modified_at
FROM file AS f
WHERE f.has_media_trimmed IS false
AND f.modified_at < now() - ?::interval
ORDER BY f.modified_at DESC
FOR UPDATE
SKIP LOCKED")
(defn- get-candidates
[{:keys [::db/conn ::min-age ::file-id]}]
(if (uuid? file-id)
(do
(l/warn :hint "explicit file id passed on params" :file-id (str file-id))
(db/query conn :file {:id file-id}))
(let [min-age (db/interval min-age)]
(db/cursor conn [sql:get-candidates min-age] {:chunk-size 1}))))
(def ^:private sql:mark-file-media-object-deleted
"UPDATE file_media_object
SET deleted_at = now()
WHERE file_id = ? AND id != ALL(?::uuid[])
RETURNING id")
(defn- clean-file-media!
"Performs the garbage collection of file media objects."
[{:keys [::db/conn]} {:keys [id data] :as file}]
(let [used (bfc/collect-used-media data)
ids (db/create-array conn "uuid" used)
unused (->> (db/exec! conn [sql:mark-file-media-object-deleted id ids])
(into #{} (map :id)))]
(doseq [id unused]
(l/trc :hint "mark deleted"
:rel "file-media-object"
:id (str id)
:file-id (str id)))
[(count unused) file]))
(def ^:private sql:mark-file-object-thumbnails-deleted
"UPDATE file_tagged_object_thumbnail
SET deleted_at = now()
WHERE file_id = ? AND object_id != ALL(?::text[])
RETURNING object_id")
(defn- clean-file-object-thumbnails!
[{:keys [::db/conn]} {:keys [data] :as file}]
(let [file-id (:id file)
using (->> (vals (:pages-index data))
(into #{} (comp
(mapcat (fn [{:keys [id objects]}]
(->> (ctt/get-frames objects)
(map #(assoc % :page-id id)))))
(mapcat (fn [{:keys [id page-id]}]
(list
(thc/fmt-object-id file-id page-id id "frame")
(thc/fmt-object-id file-id page-id id "component")))))))
ids (db/create-array conn "text" using)
unused (->> (db/exec! conn [sql:mark-file-object-thumbnails-deleted file-id ids])
(into #{} (map :object-id)))]
(doseq [object-id unused]
(l/trc :hint "mark deleted"
:rel "file-tagged-object-thumbnail"
:object-id object-id
:file-id (str file-id)))
[(count unused) file]))
(def ^:private sql:mark-file-thumbnails-deleted
"UPDATE file_thumbnail
SET deleted_at = now()
WHERE file_id = ? AND revn < ?
RETURNING revn")
(defn- clean-file-thumbnails!
[{:keys [::db/conn]} {:keys [id revn] :as file}]
(let [unused (->> (db/exec! conn [sql:mark-file-thumbnails-deleted id revn])
(into #{} (map :revn)))]
(doseq [revn unused]
(l/trc :hint "mark deleted"
:rel "file-thumbnail"
:revn revn
:file-id (str id)))
[(count unused) file]))
(def ^:private sql:get-files-for-library
"SELECT f.id, f.data, f.modified_at, f.features
FROM file AS f
LEFT JOIN file_library_rel AS fl ON (fl.file_id = f.id)
WHERE fl.library_file_id = ?
AND f.deleted_at IS null
ORDER BY f.modified_at ASC")
(defn- clean-deleted-components!
"Performs the garbage collection of unreferenced deleted components."
[{:keys [::db/conn] :as cfg} {:keys [data] :as file}]
(let [file-id (:id file)
get-used-components
(fn [data components]
;; Find which of the components are used in the file.
(into #{}
(filter #(ctf/used-in? data file-id % :component))
components))
get-unused-components
(fn [components files]
;; Find and return a set of unused components (on all files).
(reduce (fn [components {:keys [data]}]
(if (seq components)
(->> (get-used-components data components)
(set/difference components))
(reduced components)))
components
files))
process-fdata
(fn [data unused]
(reduce (fn [data id]
(l/trc :hint "delete component"
:component-id (str id)
:file-id (str file-id))
(ctkl/delete-component data id))
data
unused))
deleted (into #{} (ctkl/deleted-components-seq data))
unused (->> (db/cursor conn [sql:get-files-for-library file-id] {:chunk-size 1})
(map (partial decode-file cfg))
(cons file)
(get-unused-components deleted)
(mapv :id)
(set))
file (update file :data process-fdata unused)]
[(count unused) file]))
(def ^:private sql:get-changes
"SELECT id, data FROM file_change
WHERE file_id = ? AND data IS NOT NULL
ORDER BY created_at ASC")
(def ^:private sql:mark-deleted-data-fragments
"UPDATE file_data_fragment
SET deleted_at = now()
WHERE file_id = ?
AND id != ALL(?::uuid[])
RETURNING id")
(defn- clean-data-fragments!
[{:keys [::db/conn]} {:keys [id data] :as file}]
(let [used (->> (db/cursor conn [sql:get-changes id])
(into (feat.fdata/get-used-pointer-ids data)
(comp (map :data)
(map blob/decode)
(mapcat feat.fdata/get-used-pointer-ids))))
unused (let [ids (db/create-array conn "uuid" used)]
(->> (db/exec! conn [sql:mark-deleted-data-fragments id ids])
(into #{} (map :id))))]
(doseq [id unused]
(l/trc :hint "mark deleted"
:rel "file-data-fragment"
:id (str id)
:file-id (str id)))
[(count unused) file]))
(defn- clean-file!
[cfg {:keys [id] :as file}]
(let [[n1 file] (clean-file-media! cfg file)
[n2 file] (clean-file-thumbnails! cfg file)
[n3 file] (clean-file-object-thumbnails! cfg file)
[n4 file] (clean-deleted-components! cfg file)
[n5 file] (clean-data-fragments! cfg file)]
(l/dbg :hint "file clened"
:file-id (str id)
:modified-at (dt/format-instant (:modified-at file))
:media-objects n1
:thumbnails n2
:object-thumbnails n3
:components n4
:data-fragments n5)
file))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; HANDLER
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -55,7 +303,7 @@
(assoc ::min-age min-age))
total (reduce (fn [total file]
(clean-file! cfg file)
(process-file! cfg file)
(inc total))
0
(get-candidates cfg))]
@ -69,223 +317,3 @@
(db/rollback! conn))
{:processed total})))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; IMPL
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private
sql:get-candidates
"SELECT f.id,
f.data,
f.revn,
f.features,
f.modified_at
FROM file AS f
WHERE f.has_media_trimmed IS false
AND f.modified_at < now() - ?::interval
ORDER BY f.modified_at DESC
FOR UPDATE
SKIP LOCKED")
(defn- get-candidates
[{:keys [::db/conn ::min-age ::file-id]}]
(if (uuid? file-id)
(do
(l/warn :hint "explicit file id passed on params" :file-id (str file-id))
(->> (db/query conn :file {:id file-id})
(map #(update % :features db/decode-pgarray #{}))))
(let [min-age (db/interval min-age)]
(->> (db/cursor conn [sql:get-candidates min-age] {:chunk-size 1})
(map #(update % :features db/decode-pgarray #{}))))))
(def ^:private sql:mark-file-media-object-deleted
"UPDATE file_media_object
SET deleted_at = now()
WHERE file_id = ? AND id != ALL(?::uuid[])
RETURNING id")
(defn- clean-file-media!
"Performs the garbage collection of file media objects."
[conn file-id data]
(let [used (bfc/collect-used-media data)
ids (db/create-array conn "uuid" used)
unused (->> (db/exec! conn [sql:mark-file-media-object-deleted file-id ids])
(into #{} (map :id)))]
(doseq [id unused]
(l/trc :hint "mark deleted"
:rel "file-media-object"
:id (str id)
:file-id (str file-id)))
(count unused)))
(def ^:private sql:mark-file-object-thumbnails-deleted
"UPDATE file_tagged_object_thumbnail
SET deleted_at = now()
WHERE file_id = ? AND object_id != ALL(?::text[])
RETURNING object_id")
(defn- clean-file-object-thumbnails!
[{:keys [::db/conn]} file-id data]
(let [using (->> (vals (:pages-index data))
(into #{} (comp
(mapcat (fn [{:keys [id objects]}]
(->> (ctt/get-frames objects)
(map #(assoc % :page-id id)))))
(mapcat (fn [{:keys [id page-id]}]
(list
(thc/fmt-object-id file-id page-id id "frame")
(thc/fmt-object-id file-id page-id id "component")))))))
ids (db/create-array conn "text" using)
unused (->> (db/exec! conn [sql:mark-file-object-thumbnails-deleted file-id ids])
(into #{} (map :object-id)))]
(doseq [object-id unused]
(l/trc :hint "mark deleted"
:rel "file-tagged-object-thumbnail"
:object-id object-id
:file-id (str file-id)))
(count unused)))
(def ^:private sql:mark-file-thumbnails-deleted
"UPDATE file_thumbnail
SET deleted_at = now()
WHERE file_id = ? AND revn < ?
RETURNING revn")
(defn- clean-file-thumbnails!
[{:keys [::db/conn]} file-id revn]
(let [unused (->> (db/exec! conn [sql:mark-file-thumbnails-deleted file-id revn])
(into #{} (map :revn)))]
(doseq [revn unused]
(l/trc :hint "mark deleted"
:rel "file-thumbnail"
:revn revn
:file-id (str file-id)))
(count unused)))
(def ^:private sql:get-files-for-library
"SELECT f.id, f.data, f.modified_at
FROM file AS f
LEFT JOIN file_library_rel AS fl ON (fl.file_id = f.id)
WHERE fl.library_file_id = ?
AND f.deleted_at IS null
ORDER BY f.modified_at ASC")
(defn- clean-deleted-components!
"Performs the garbage collection of unreferenced deleted components."
[{:keys [::db/conn] :as cfg} file-id data]
(letfn [(get-used-components [fdata components]
;; Find which of the components are used in the file.
(into #{}
(filter #(ctf/used-in? fdata file-id % :component))
components))
(get-unused-components [components files-data]
;; Find and return a set of unused components (on all files).
(reduce (fn [components fdata]
(if (seq components)
(->> (get-used-components fdata components)
(set/difference components))
(reduced components)))
components
files-data))]
(let [deleted (into #{} (ctkl/deleted-components-seq data))
unused (->> (db/cursor conn [sql:get-files-for-library file-id] {:chunk-size 1})
(map (fn [{:keys [id data] :as file}]
(binding [pmap/*load-fn* (partial feat.fdata/load-pointer cfg id)]
(-> (blob/decode data)
(feat.fdata/process-pointers deref)))))
(cons data)
(get-unused-components deleted)
(mapv :id))]
(doseq [id unused]
(l/trc :hint "delete component" :component-id (str id) :file-id (str file-id)))
(when-let [data (some->> (seq unused)
(reduce ctkl/delete-component data)
(blob/encode))]
(db/update! conn :file
{:data data}
{:id file-id}))
(count unused))))
(def ^:private sql:get-changes
"SELECT id, data FROM file_change
WHERE file_id = ? AND data IS NOT NULL
ORDER BY created_at ASC")
(def ^:private sql:mark-deleted-data-fragments
"UPDATE file_data_fragment
SET deleted_at = now()
WHERE file_id = ?
AND id != ALL(?::uuid[])
RETURNING id")
(defn- clean-data-fragments!
[conn file-id data]
(let [used (->> (db/cursor conn [sql:get-changes file-id])
(into (feat.fdata/get-used-pointer-ids data)
(comp (map :data)
(map blob/decode)
(mapcat feat.fdata/get-used-pointer-ids))))
unused (let [ids (db/create-array conn "uuid" used)]
(->> (db/exec! conn [sql:mark-deleted-data-fragments file-id ids])
(into #{} (map :id))))]
(doseq [id unused]
(l/trc :hint "mark deleted"
:rel "file-data-fragment"
:id (str id)
:file-id (str file-id)))
(count unused)))
(defn- clean-file!
[{:keys [::db/conn] :as cfg} {:keys [id data revn modified-at] :as file}]
(binding [pmap/*load-fn* (partial feat.fdata/load-pointer cfg id)
pmap/*tracked* (pmap/create-tracked)]
(let [data (-> (blob/decode data)
(assoc :id id)
(pmg/migrate-data))
nfm (clean-file-media! conn id data)
nfot (clean-file-object-thumbnails! cfg id data)
nft (clean-file-thumbnails! cfg id revn)
nc (clean-deleted-components! cfg id data)
ndf (clean-data-fragments! conn id data)]
(l/dbg :hint "file clened"
:file-id (str id)
:modified-at (dt/format-instant modified-at)
:media-objects nfm
:thumbnails nft
:object-thumbnails nfot
:components nc
:data-fragments ndf)
;; Mark file as trimmed
(db/update! conn :file
{:has-media-trimmed true}
{:id id})
(feat.fdata/persist-pointers! cfg id))))

View file

@ -154,7 +154,7 @@
;; Check the number of fragments before adding the page
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 1 (count rows))))
(t/is (= 2 (count rows))))
;; Add page
(update-file!
@ -172,15 +172,15 @@
;; Check the number of fragments
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 2 (count rows))))
(t/is (= 5 (count rows))))
;; The objects-gc should remove unused fragments
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 0 (:processed res))))
(t/is (= 1 (:processed res))))
;; Check the number of fragments
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 2 (count rows))))
(t/is (= 4 (count rows))))
;; Add shape to page that should add a new fragment
(update-file!
@ -203,7 +203,7 @@
;; Check the number of fragments
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 3 (count rows))))
(t/is (= 5 (count rows))))
;; The file-gc should mark for remove unused fragments
(let [res (th/run-task! :file-gc {:min-age 0})]
@ -211,12 +211,13 @@
;; The objects-gc should remove unused fragments
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 0 (:processed res))))
(t/is (= 1 (:processed res))))
;; Check the number of fragments; should be 3 because changes
;; are also holding pointers to fragments;
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 3 (count rows))))
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)
:deleted-at nil})]
(t/is (= 6 (count rows))))
;; Lets proceed to delete all changes
(th/db-delete! :file-change {:file-id (:id file)})
@ -224,7 +225,6 @@
{:has-media-trimmed false}
{:id (:id file)})
;; The file-gc should remove fragments related to changes
;; snapshots previously deleted.
(let [res (th/run-task! :file-gc {:min-age 0})]
@ -233,11 +233,11 @@
;; Check the number of fragments;
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
;; (pp/pprint rows)
(t/is (= 3 (count rows)))
(t/is (= 8 (count rows)))
(t/is (= 2 (count (remove (comp some? :deleted-at) rows)))))
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 1 (:processed res))))
(t/is (= 6 (:processed res))))
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 2 (count rows)))))))
@ -367,7 +367,7 @@
(t/is (= 1 (:processed res))))
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 1 (:processed res))))
(t/is (= 2 (:processed res))))
;; Now that file-gc have deleted the file-media-object usage,
;; lets execute the touched-gc task, we should see that two of
@ -432,6 +432,11 @@
page-id (first (get-in file [:data :pages]))]
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)
:deleted-at nil})]
(t/is (= (count rows) 1)))
;; Update file inserting a new image object
(update-file!
:file-id (:id file)
@ -491,6 +496,10 @@
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 1 (:processed res))))
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)
:deleted-at nil})]
(t/is (= (count rows) 2)))
;; retrieve file and check trimmed attribute
(let [row (th/db-get :file {:id (:id file)})]
(t/is (true? (:has-media-trimmed row))))
@ -521,11 +530,16 @@
;; Now, we have deleted the usage of pointers to the
;; file-media-objects, if we paste file-gc, they should be marked
;; as deleted.
(let [res (th/run-task! :file-gc {:min-age 0})]
(t/is (= 1 (:processed res))))
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 5 (:processed res))))
(t/is (= 6 (:processed res))))
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)
:deleted-at nil})]
(t/is (= (count rows) 3)))
;; Now that file-gc have deleted the file-media-object usage,
;; lets execute the touched-gc task, we should see that two of
@ -681,7 +695,6 @@
(let [rows (th/db-query :file-tagged-object-thumbnail {:file-id file-id})]
(t/is (= 2 (count rows)))
(t/is (= 1 (count (remove (comp some? :deleted-at) rows))))
(t/is (= (thc/fmt-object-id file-id page-id frame-id-1 "frame")
(-> rows first :object-id))))
@ -689,7 +702,7 @@
;; thumbnail lets execute the objects-gc task which remove
;; the rows and mark as touched the storage object rows
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 2 (:processed res))))
(t/is (= 3 (:processed res))))
;; Now that objects-gc have deleted the object thumbnail lets
;; execute the touched-gc task
@ -719,7 +732,7 @@
(let [res (th/run-task! :objects-gc {:min-age 0})]
;; (pp/pprint res)
(t/is (= 1 (:processed res))))
(t/is (= 2 (:processed res))))
;; We still have th storage objects in the table
(let [rows (th/db-query :storage-object {:deleted-at nil})]
@ -736,6 +749,7 @@
;; (pp/pprint rows)
(t/is (= 0 (count rows)))))))
(t/deftest permissions-checks-creating-file
(let [profile1 (th/create-profile* 1)
profile2 (th/create-profile* 2)
@ -1147,7 +1161,7 @@
(t/is (= 1 (count (remove (comp some? :deleted-at) rows)))))
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 2 (:processed res))))
(t/is (= 3 (:processed res))))
(let [rows (th/db-query :file-tagged-object-thumbnail {:file-id (:id file)})]
(t/is (= 1 (count rows)))))))
@ -1203,7 +1217,7 @@
(t/is (= 1 (count (remove (comp some? :deleted-at) rows)))))
(let [res (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 1 (:processed res))))
(t/is (= 2 (:processed res))))
(let [rows (th/db-query :file-thumbnail {:file-id (:id file)})]
(t/is (= 1 (count rows)))))))

View file

@ -222,7 +222,7 @@
(t/is (= 1 (:processed result))))
(let [result (th/run-task! :objects-gc {:min-age 0})]
(t/is (= 1 (:processed result))))
(t/is (= 2 (:processed result))))
;; check if row1 related thumbnail row still exists
(let [[row :as rows] (th/db-query :file-thumbnail