0
Fork 0
mirror of https://github.com/penpot/penpot.git synced 2025-01-23 23:18:48 -05:00

Improve file-gc task

make it more aware of fragments referenced on changes snapshots
This commit is contained in:
Andrey Antukh 2023-04-25 17:14:38 +02:00
parent 82dad3217b
commit 5e89aa2726
7 changed files with 260 additions and 130 deletions

View file

@ -625,7 +625,7 @@
(let [file (read-obj! input) (let [file (read-obj! input)
media' (read-obj! input) media' (read-obj! input)
file-id (:id file) file-id (:id file)
features files/default-features] features (files/get-default-features)]
(when (not= file-id expected-file-id) (when (not= file-id expected-file-id)
(ex/raise :type :validation (ex/raise :type :validation

View file

@ -44,7 +44,8 @@
"storage/pointer-map" "storage/pointer-map"
"components/v2"}) "components/v2"})
(def default-features (defn get-default-features
[]
(cond-> #{} (cond-> #{}
(contains? cf/flags :fdata-storage-pointer-map) (contains? cf/flags :fdata-storage-pointer-map)
(conj "storage/pointer-map") (conj "storage/pointer-map")
@ -234,6 +235,15 @@
(update-fn val) (update-fn val)
val))))))) val)))))))
(defn get-all-pointer-ids
"Given a file, return all pointer ids used in the data."
[fdata]
(->> (concat (vals fdata)
(vals (:pages-index fdata)))
(into #{} (comp (filter pmap/pointer-map?)
(map pmap/get-id)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; QUERY COMMANDS ;; QUERY COMMANDS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View file

@ -42,8 +42,9 @@
(db/exec-one! conn ["SET CONSTRAINTS ALL DEFERRED;"]) (db/exec-one! conn ["SET CONSTRAINTS ALL DEFERRED;"])
(let [id (or id (uuid/next)) (let [id (or id (uuid/next))
features (-> (into files/default-features features) features (->> features
(files/check-features-compatibility!)) (into (files/get-default-features))
(files/check-features-compatibility!))
data (binding [pmap/*tracked* (atom {}) data (binding [pmap/*tracked* (atom {})
ffeat/*current* features ffeat/*current* features

View file

@ -148,13 +148,14 @@
(let [file (get-file conn id) (let [file (get-file conn id)
features (->> (concat (:features file) features (->> (concat (:features file)
(:features params)) (:features params))
(into files/default-features) (into (files/get-default-features))
(files/check-features-compatibility!))] (files/check-features-compatibility!))]
(files/check-edition-permissions! conn profile-id (:id file)) (files/check-edition-permissions! conn profile-id (:id file))
(binding [ffeat/*current* features (binding [ffeat/*current* features
ffeat/*previous* (:features file)] ffeat/*previous* (:features file)]
(let [update-fn (cond-> update-file* (let [update-fn (cond-> update-file*
(contains? features "storage/pointer-map") (contains? features "storage/pointer-map")
(wrap-with-pointer-map-context) (wrap-with-pointer-map-context)

View file

@ -26,7 +26,7 @@
[clojure.spec.alpha :as s] [clojure.spec.alpha :as s]
[integrant.core :as ig])) [integrant.core :as ig]))
(declare ^:private retrieve-candidates) (declare ^:private get-candidates)
(declare ^:private process-file) (declare ^:private process-file)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -43,31 +43,34 @@
(defmethod ig/init-key ::handler (defmethod ig/init-key ::handler
[_ {:keys [::db/pool] :as cfg}] [_ {:keys [::db/pool] :as cfg}]
(fn [{:keys [file-id] :as params}] (fn [{:keys [file-id] :as params}]
(db/with-atomic [conn pool] (db/with-atomic [conn pool]
(let [min-age (or (:min-age params) (::min-age cfg)) (let [min-age (dt/duration (or (:min-age params) (::min-age cfg)))
cfg (assoc cfg ::min-age min-age ::conn conn ::file-id file-id)] cfg (-> cfg
(loop [total 0 (assoc ::db/conn conn)
files (retrieve-candidates cfg)] (assoc ::file-id file-id)
(if-let [file (first files)] (assoc ::min-age min-age))
(do
(process-file conn file)
(recur (inc total)
(rest files)))
(do
(l/info :hint "task finished" :min-age (dt/format-duration min-age) :processed total)
;; Allow optional rollback passed by params total (reduce (fn [total file]
(when (:rollback? params) (process-file cfg file)
(db/rollback! conn)) (inc total))
0
(get-candidates cfg))]
{:processed total}))))))) (l/info :hint "task finished" :min-age (dt/format-duration min-age) :processed total)
;; Allow optional rollback passed by params
(when (:rollback? params)
(db/rollback! conn))
{:processed total}))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; IMPL ;; IMPL
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private (def ^:private
sql:retrieve-candidates-chunk sql:get-candidates-chunk
"select f.id, "select f.id,
f.data, f.data,
f.revn, f.revn,
@ -81,8 +84,8 @@
limit 1 limit 1
for update skip locked") for update skip locked")
(defn- retrieve-candidates (defn- get-candidates
[{:keys [::conn ::min-age ::file-id]}] [{:keys [::db/conn ::min-age ::file-id]}]
(if (uuid? file-id) (if (uuid? file-id)
(do (do
(l/warn :hint "explicit file id passed on params" :file-id file-id) (l/warn :hint "explicit file id passed on params" :file-id file-id)
@ -90,7 +93,7 @@
(map #(update % :features db/decode-pgarray #{})))) (map #(update % :features db/decode-pgarray #{}))))
(let [interval (db/interval min-age) (let [interval (db/interval min-age)
get-chunk (fn [cursor] get-chunk (fn [cursor]
(let [rows (db/exec! conn [sql:retrieve-candidates-chunk interval cursor])] (let [rows (db/exec! conn [sql:get-candidates-chunk interval cursor])]
[(some->> rows peek :modified-at) [(some->> rows peek :modified-at)
(map #(update % :features db/decode-pgarray #{}) rows)]))] (map #(update % :features db/decode-pgarray #{}) rows)]))]
@ -100,8 +103,7 @@
:initk (dt/now))))) :initk (dt/now)))))
(defn collect-used-media (defn collect-used-media
"Analyzes the file data and collects all references to external "Given a fdata (file data), returns all media references."
assets. Returns a set of ids."
[data] [data]
(let [xform (comp (let [xform (comp
(map :objects) (map :objects)
@ -138,7 +140,7 @@
;; them. ;; them.
(db/delete! conn :file-media-object {:id (:id mobj)})))) (db/delete! conn :file-media-object {:id (:id mobj)}))))
(defn- clean-file-frame-thumbnails! (defn- clean-file-object-thumbnails!
[conn file-id data] [conn file-id data]
(let [stored (->> (db/query conn :file-object-thumbnail (let [stored (->> (db/query conn :file-object-thumbnail
{:file-id file-id} {:file-id file-id}
@ -171,7 +173,7 @@
(l/debug :hint "delete file thumbnails" :file-id file-id :total (:next.jdbc/update-count res))))) (l/debug :hint "delete file thumbnails" :file-id file-id :total (:next.jdbc/update-count res)))))
(def ^:private (def ^:private
sql:retrieve-client-files sql:get-files-for-library
"select f.data, f.modified_at "select f.data, f.modified_at
from file as f from file as f
left join file_library_rel as fl on (fl.file_id = f.id) left join file_library_rel as fl on (fl.file_id = f.id)
@ -181,75 +183,76 @@
order by f.modified_at desc order by f.modified_at desc
limit 1") limit 1")
(defn- retrieve-client-files
"search al files that use the given library.
Returns a sequence of file-data (only reads database rows one by one)."
[conn library-id]
(let [get-chunk (fn [cursor]
(let [rows (db/exec! conn [sql:retrieve-client-files library-id cursor])]
[(some-> rows peek :modified-at)
(map (comp blob/decode :data) rows)]))]
(d/iteration get-chunk
:vf second
:kf first
:initk (dt/now))))
(defn- clean-deleted-components! (defn- clean-deleted-components!
"Performs the garbage collection of unreferenced deleted components." "Performs the garbage collection of unreferenced deleted components."
[conn library-id library-data] [conn file-id data]
(let [find-used-components-file (letfn [(get-files-chunk [cursor]
(fn [components file-data] (let [rows (db/exec! conn [sql:get-files-for-library file-id cursor])]
; Find which of the components are used in the file. [(some-> rows peek :modified-at)
(into #{} (map (comp blob/decode :data) rows)]))
(filter #(ctf/used-in? file-data library-id % :component))
components))
find-unused-components (get-used-components [fdata components]
(fn [components files-data] ;; Find which of the components are used in the file.
; Find what components are NOT used in any of the files. (into #{}
(loop [files-data files-data (filter #(ctf/used-in? fdata file-id % :component))
components components] components))
(let [file-data (first files-data)]
(if (or (nil? file-data) (empty? components))
components
(let [used-components-file (find-used-components-file components file-data)]
(recur (rest files-data)
(into #{} (remove used-components-file) components)))))))
deleted-components (set (ctkl/deleted-components-seq library-data)) (get-unused-components [components files-data]
unused-components (find-unused-components deleted-components ;; Find and return a set of unused components (on all files).
(cons library-data (reduce (fn [components fdata]
(retrieve-client-files conn library-id))) (if (seq components)
total (count unused-components)] (->> (get-used-components fdata components)
(set/difference components))
(reduced components)))
(when-not (zero? total) components
(l/debug :hint "clean deleted components" :total total) files-data))]
(let [new-data (reduce #(ctkl/delete-component %1 (:id %2))
library-data
unused-components)]
(db/update! conn :file
{:data (blob/encode new-data)}
{:id library-id})))))
(def ^:private sql:get-unused-fragments (let [deleted (into #{} (ctkl/deleted-components-seq data))
"SELECT id FROM file_data_fragment unused (->> (d/iteration get-files-chunk :vf second :kf first :initk (dt/now))
WHERE file_id = ? AND id != ALL(?::uuid[])") (cons data)
(get-unused-components deleted)
(mapv :id))]
(when (seq unused)
(l/debug :hint "clean deleted components" :total (count unused))
(let [data (reduce ctkl/delete-component data unused)]
(db/update! conn :file
{:data (blob/encode data)}
{:id file-id}))))))
(defn- clean-data-fragments! (defn- clean-data-fragments!
[conn file-id data] [conn file-id data]
(let [used (->> (concat (vals data) (letfn [(get-pointers-chunk [cursor]
(vals (:pages-index data))) (let [sql (str "select id, data, created_at "
(into #{} (comp (filter pmap/pointer-map?) " from file_change "
(map pmap/get-id))) " where file_id = ? "
(db/create-array conn "uuid")) " and data is not null "
rows (db/exec! conn [sql:get-unused-fragments file-id used])] " and created_at < ? "
(doseq [fragment-id (map :id rows)] " order by created_at desc "
(l/trace :hint "remove unused file data fragment" :id (str fragment-id)) " limit 1;")
(db/delete! conn :file-data-fragment {:id fragment-id :file-id file-id})))) rows (db/exec! conn [sql file-id cursor])]
[(some-> rows peek :created-at)
(mapcat (comp files/get-all-pointer-ids blob/decode :data) rows)]))]
(let [used (into (files/get-all-pointer-ids data)
(d/iteration get-pointers-chunk
:vf second
:kf first
:initk (dt/now)))
sql (str "select id from file_data_fragment "
" where file_id = ? AND id != ALL(?::uuid[])")
used (db/create-array conn "uuid" used)
rows (db/exec! conn [sql file-id used])]
(doseq [fragment-id (map :id rows)]
(l/trace :hint "remove unused file data fragment" :id (str fragment-id))
(db/delete! conn :file-data-fragment {:id fragment-id :file-id file-id})))))
(defn- process-file (defn- process-file
[conn {:keys [id data revn modified-at features] :as file}] [{:keys [::db/conn]} {:keys [id data revn modified-at features] :as file}]
(l/debug :hint "processing file" :id id :modified-at modified-at) (l/debug :hint "processing file" :id id :modified-at modified-at)
(binding [pmap/*load-fn* (partial files/load-pointer conn id)] (binding [pmap/*load-fn* (partial files/load-pointer conn id)]
@ -258,7 +261,7 @@
(pmg/migrate-data))] (pmg/migrate-data))]
(clean-file-media! conn id data) (clean-file-media! conn id data)
(clean-file-frame-thumbnails! conn id data) (clean-file-object-thumbnails! conn id data)
(clean-file-thumbnails! conn id revn) (clean-file-thumbnails! conn id revn)
(clean-deleted-components! conn id data) (clean-deleted-components! conn id data)
@ -268,5 +271,4 @@
;; Mark file as trimmed ;; Mark file as trimmed
(db/update! conn :file (db/update! conn :file
{:has-media-trimmed true} {:has-media-trimmed true}
{:id id}) {:id id}))))
nil)))

View file

@ -52,7 +52,8 @@
(def defaults (def defaults
{:database-uri "postgresql://postgres/penpot_test" {:database-uri "postgresql://postgres/penpot_test"
:redis-uri "redis://redis/1"}) :redis-uri "redis://redis/1"
:file-change-snapshot-every 1})
(def config (def config
(->> (cf/read-env "penpot-test") (->> (cf/read-env "penpot-test")
@ -63,7 +64,9 @@
[:enable-secure-session-cookies [:enable-secure-session-cookies
:enable-email-verification :enable-email-verification
:enable-smtp :enable-smtp
:enable-quotes]) :enable-quotes
:enable-fdata-storage-pointer-map
:enable-fdata-storage-objets-map])
(def test-init-sql (def test-init-sql
["alter table project_profile_rel set unlogged;\n" ["alter table project_profile_rel set unlogged;\n"
@ -134,7 +137,7 @@
:app.auth.oidc/generic-provider :app.auth.oidc/generic-provider
:app.setup/builtin-templates :app.setup/builtin-templates
:app.auth.oidc/routes :app.auth.oidc/routes
:app.worker/executors-monitor :app.worker/monitor
:app.http.oauth/handler :app.http.oauth/handler
:app.notifications/handler :app.notifications/handler
:app.loggers.mattermost/reporter :app.loggers.mattermost/reporter
@ -467,6 +470,14 @@
[sql] [sql]
(db/exec! *pool* sql)) (db/exec! *pool* sql))
(defn db-delete!
[& params]
(apply db/delete! *pool* params))
(defn db-update!
[& params]
(apply db/update! *pool* params))
(defn db-insert! (defn db-insert!
[& params] [& params]
(apply db/insert! *pool* params)) (apply db/insert! *pool* params))

View file

@ -122,8 +122,111 @@
(t/is (= 0 (count result)))))) (t/is (= 0 (count result))))))
)) ))
(t/deftest file-gc-task (t/deftest file-gc-with-fragments
(letfn [(create-file-media-object [{:keys [profile-id file-id]}] (letfn [(update-file! [& {:keys [profile-id file-id changes revn] :or {revn 0}}]
(let [params {::th/type :update-file
::rpc/profile-id profile-id
:id file-id
:session-id (uuid/random)
:revn revn
:components-v2 true
:changes changes}
out (th/command! params)]
(t/is (nil? (:error out)))
(:result out)))]
(let [profile (th/create-profile* 1)
file (th/create-file* 1 {:profile-id (:id profile)
:project-id (:default-project-id profile)
:is-shared false})
page-id (uuid/random)
shape-id (uuid/random)]
;; Preventive file-gc
(let [res (th/run-task! "file-gc" {:min-age 0})]
(t/is (= 1 (:processed res))))
;; Check the number of fragments before adding the page
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 1 (count rows))))
;; Add page
(update-file!
:file-id (:id file)
:profile-id (:id profile)
:revn 0
:changes
[{:type :add-page
:name "test"
:id page-id}])
;; Check the number of fragments
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 2 (count rows))))
;; Check the number of fragments
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 2 (count rows))))
;; The file-gc should remove unused fragments
(let [res (th/run-task! "file-gc" {:min-age 0})]
(t/is (= 1 (:processed res))))
;; Add shape to page that should add a new fragment
(update-file!
:file-id (:id file)
:profile-id (:id profile)
:revn 0
:changes
[{:type :add-obj
:page-id page-id
:id shape-id
:parent-id uuid/zero
:frame-id uuid/zero
:components-v2 true
:obj {:id shape-id
:name "image"
:frame-id uuid/zero
:parent-id uuid/zero
:type :rect}}])
;; Check the number of fragments
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 3 (count rows))))
;; The file-gc should remove unused fragments
(let [res (th/run-task! "file-gc" {:min-age 0})]
(t/is (= 1 (:processed res))))
;; Check the number of fragments; should be 3 because changes
;; are also holding pointers to fragments;
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 3 (count rows))))
;; Lets proceed to delete all changes
(th/db-delete! :file-change {:file-id (:id file)})
(th/db-update! :file
{:has-media-trimmed false}
{:id (:id file)})
;; The file-gc should remove fragments related to changes
;; snapshots previously deleted.
(let [res (th/run-task! "file-gc" {:min-age 0})]
(t/is (= 1 (:processed res))))
;; Check the number of fragments;
(let [rows (th/db-query :file-data-fragment {:file-id (:id file)})]
(t/is (= 2 (count rows))))
)))
(t/deftest file-gc-task-with-thumbnails
(letfn [(add-file-media-object [& {:keys [profile-id file-id]}]
(let [mfile {:filename "sample.jpg" (let [mfile {:filename "sample.jpg"
:path (th/tempfile "backend_tests/test_files/sample.jpg") :path (th/tempfile "backend_tests/test_files/sample.jpg")
:mtype "image/jpeg" :mtype "image/jpeg"
@ -140,7 +243,7 @@
(t/is (nil? (:error out))) (t/is (nil? (:error out)))
(:result out))) (:result out)))
(update-file [{:keys [profile-id file-id changes revn] :or {revn 0}}] (update-file! [& {:keys [profile-id file-id changes revn] :or {revn 0}}]
(let [params {::th/type :update-file (let [params {::th/type :update-file
::rpc/profile-id profile-id ::rpc/profile-id profile-id
:id file-id :id file-id
@ -159,29 +262,31 @@
:project-id (:default-project-id profile) :project-id (:default-project-id profile)
:is-shared false}) :is-shared false})
fmo1 (create-file-media-object {:profile-id (:id profile) fmo1 (add-file-media-object :profile-id (:id profile) :file-id (:id file))
:file-id (:id file)}) fmo2 (add-file-media-object :profile-id (:id profile) :file-id (:id file))
fmo2 (create-file-media-object {:profile-id (:id profile)
:file-id (:id file)})
shid (uuid/random) shid (uuid/random)
ures (update-file page-id (first (get-in file [:data :pages]))]
{:file-id (:id file)
:profile-id (:id profile)
:revn 0 ;; Update file inserting a new image object
:changes (update-file!
[{:type :add-obj :file-id (:id file)
:page-id (first (get-in file [:data :pages])) :profile-id (:id profile)
:id shid :revn 0
:parent-id uuid/zero :changes
:frame-id uuid/zero [{:type :add-obj
:components-v2 true :page-id page-id
:obj {:id shid :id shid
:name "image" :parent-id uuid/zero
:frame-id uuid/zero :frame-id uuid/zero
:parent-id uuid/zero :components-v2 true
:type :image :obj {:id shid
:metadata {:id (:id fmo1)}}}]})] :name "image"
:frame-id uuid/zero
:parent-id uuid/zero
:type :image
:metadata {:id (:id fmo1)}}}])
;; Check that reference storage objects on filemediaobjects ;; Check that reference storage objects on filemediaobjects
;; are the same because of deduplication feature. ;; are the same because of deduplication feature.
@ -190,28 +295,27 @@
;; If we launch gc-touched-task, we should have 2 items to ;; If we launch gc-touched-task, we should have 2 items to
;; freeze because of the deduplication (we have uploaded 2 times ;; freeze because of the deduplication (we have uploaded 2 times
;; 2 two same files). ;; the same files).
(let [task (:app.storage/gc-touched-task th/*system*) (let [task (:app.storage/gc-touched-task th/*system*)
res (task {:min-age (dt/duration 0)})] res (task {:min-age (dt/duration 0)})]
(t/is (= 2 (:freeze res))) (t/is (= 2 (:freeze res)))
(t/is (= 0 (:delete res)))) (t/is (= 0 (:delete res))))
;; run the file-gc task immediately without forced min-age ;; run the file-gc task immediately without forced min-age
(let [task (:app.tasks.file-gc/handler th/*system*) (let [res (th/run-task! "file-gc")]
res (task {})]
(t/is (= 0 (:processed res)))) (t/is (= 0 (:processed res))))
;; run the task again ;; run the task again
(let [task (:app.tasks.file-gc/handler th/*system*) (let [res (th/run-task! "file-gc" {:min-age 0})]
res (task {:min-age (dt/duration 0)})]
(t/is (= 1 (:processed res)))) (t/is (= 1 (:processed res))))
;; retrieve file and check trimmed attribute ;; retrieve file and check trimmed attribute
(let [row (db/exec-one! th/*pool* ["select * from file where id = ?" (:id file)])] (let [row (th/db-get :file {:id (:id file)})]
(t/is (true? (:has-media-trimmed row)))) (t/is (true? (:has-media-trimmed row))))
;; check file media objects ;; check file media objects
(let [rows (db/exec! th/*pool* ["select * from file_media_object where file_id = ?" (:id file)])] (let [rows (th/db-exec! ["select * from file_media_object where file_id = ?" (:id file)])]
(t/is (= 1 (count rows)))) (t/is (= 1 (count rows))))
;; The underlying storage objects are still available. ;; The underlying storage objects are still available.
@ -221,12 +325,13 @@
(t/is (some? (sto/get-object storage (:thumbnail-id fmo1)))) (t/is (some? (sto/get-object storage (:thumbnail-id fmo1))))
;; proceed to remove usage of the file ;; proceed to remove usage of the file
(update-file {:file-id (:id file) (update-file!
:profile-id (:id profile) :file-id (:id file)
:revn 0 :profile-id (:id profile)
:changes [{:type :del-obj :revn 0
:page-id (first (get-in file [:data :pages])) :changes [{:type :del-obj
:id shid}]}) :page-id (first (get-in file [:data :pages]))
:id shid}])
;; Now, we have deleted the usage of pointers to the ;; Now, we have deleted the usage of pointers to the
;; file-media-objects, if we paste file-gc, they should be marked ;; file-media-objects, if we paste file-gc, they should be marked