From 5e89aa2726480c942109415e850fb9a0a38800ce Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Tue, 25 Apr 2023 17:14:38 +0200 Subject: [PATCH] :sparkles: Improve file-gc task make it more aware of fragments referenced on changes snapshots --- backend/src/app/rpc/commands/binfile.clj | 2 +- backend/src/app/rpc/commands/files.clj | 12 +- backend/src/app/rpc/commands/files_create.clj | 5 +- backend/src/app/rpc/commands/files_update.clj | 3 +- backend/src/app/tasks/file_gc.clj | 172 ++++++++--------- backend/test/backend_tests/helpers.clj | 17 +- backend/test/backend_tests/rpc_file_test.clj | 179 ++++++++++++++---- 7 files changed, 260 insertions(+), 130 deletions(-) diff --git a/backend/src/app/rpc/commands/binfile.clj b/backend/src/app/rpc/commands/binfile.clj index 33e0ed04d..c37b9ae3d 100644 --- a/backend/src/app/rpc/commands/binfile.clj +++ b/backend/src/app/rpc/commands/binfile.clj @@ -625,7 +625,7 @@ (let [file (read-obj! input) media' (read-obj! input) file-id (:id file) - features files/default-features] + features (files/get-default-features)] (when (not= file-id expected-file-id) (ex/raise :type :validation diff --git a/backend/src/app/rpc/commands/files.clj b/backend/src/app/rpc/commands/files.clj index 4aaaba256..9f48652c3 100644 --- a/backend/src/app/rpc/commands/files.clj +++ b/backend/src/app/rpc/commands/files.clj @@ -44,7 +44,8 @@ "storage/pointer-map" "components/v2"}) -(def default-features +(defn get-default-features + [] (cond-> #{} (contains? cf/flags :fdata-storage-pointer-map) (conj "storage/pointer-map") @@ -234,6 +235,15 @@ (update-fn val) val))))))) + +(defn get-all-pointer-ids + "Given a file, return all pointer ids used in the data." + [fdata] + (->> (concat (vals fdata) + (vals (:pages-index fdata))) + (into #{} (comp (filter pmap/pointer-map?) + (map pmap/get-id))))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; QUERY COMMANDS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/backend/src/app/rpc/commands/files_create.clj b/backend/src/app/rpc/commands/files_create.clj index 6960b28f0..734c281d4 100644 --- a/backend/src/app/rpc/commands/files_create.clj +++ b/backend/src/app/rpc/commands/files_create.clj @@ -42,8 +42,9 @@ (db/exec-one! conn ["SET CONSTRAINTS ALL DEFERRED;"]) (let [id (or id (uuid/next)) - features (-> (into files/default-features features) - (files/check-features-compatibility!)) + features (->> features + (into (files/get-default-features)) + (files/check-features-compatibility!)) data (binding [pmap/*tracked* (atom {}) ffeat/*current* features diff --git a/backend/src/app/rpc/commands/files_update.clj b/backend/src/app/rpc/commands/files_update.clj index aebaddd1c..239999977 100644 --- a/backend/src/app/rpc/commands/files_update.clj +++ b/backend/src/app/rpc/commands/files_update.clj @@ -148,13 +148,14 @@ (let [file (get-file conn id) features (->> (concat (:features file) (:features params)) - (into files/default-features) + (into (files/get-default-features)) (files/check-features-compatibility!))] (files/check-edition-permissions! conn profile-id (:id file)) (binding [ffeat/*current* features ffeat/*previous* (:features file)] + (let [update-fn (cond-> update-file* (contains? features "storage/pointer-map") (wrap-with-pointer-map-context) diff --git a/backend/src/app/tasks/file_gc.clj b/backend/src/app/tasks/file_gc.clj index 81464453f..2ace1c827 100644 --- a/backend/src/app/tasks/file_gc.clj +++ b/backend/src/app/tasks/file_gc.clj @@ -26,7 +26,7 @@ [clojure.spec.alpha :as s] [integrant.core :as ig])) -(declare ^:private retrieve-candidates) +(declare ^:private get-candidates) (declare ^:private process-file) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -43,31 +43,34 @@ (defmethod ig/init-key ::handler [_ {:keys [::db/pool] :as cfg}] (fn [{:keys [file-id] :as params}] + (db/with-atomic [conn pool] - (let [min-age (or (:min-age params) (::min-age cfg)) - cfg (assoc cfg ::min-age min-age ::conn conn ::file-id file-id)] - (loop [total 0 - files (retrieve-candidates cfg)] - (if-let [file (first files)] - (do - (process-file conn file) - (recur (inc total) - (rest files))) - (do - (l/info :hint "task finished" :min-age (dt/format-duration min-age) :processed total) + (let [min-age (dt/duration (or (:min-age params) (::min-age cfg))) + cfg (-> cfg + (assoc ::db/conn conn) + (assoc ::file-id file-id) + (assoc ::min-age min-age)) - ;; Allow optional rollback passed by params - (when (:rollback? params) - (db/rollback! conn)) + total (reduce (fn [total file] + (process-file cfg file) + (inc total)) + 0 + (get-candidates cfg))] - {:processed total}))))))) + (l/info :hint "task finished" :min-age (dt/format-duration min-age) :processed total) + + ;; Allow optional rollback passed by params + (when (:rollback? params) + (db/rollback! conn)) + + {:processed total})))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; IMPL ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (def ^:private - sql:retrieve-candidates-chunk + sql:get-candidates-chunk "select f.id, f.data, f.revn, @@ -81,8 +84,8 @@ limit 1 for update skip locked") -(defn- retrieve-candidates - [{:keys [::conn ::min-age ::file-id]}] +(defn- get-candidates + [{:keys [::db/conn ::min-age ::file-id]}] (if (uuid? file-id) (do (l/warn :hint "explicit file id passed on params" :file-id file-id) @@ -90,7 +93,7 @@ (map #(update % :features db/decode-pgarray #{})))) (let [interval (db/interval min-age) get-chunk (fn [cursor] - (let [rows (db/exec! conn [sql:retrieve-candidates-chunk interval cursor])] + (let [rows (db/exec! conn [sql:get-candidates-chunk interval cursor])] [(some->> rows peek :modified-at) (map #(update % :features db/decode-pgarray #{}) rows)]))] @@ -100,8 +103,7 @@ :initk (dt/now))))) (defn collect-used-media - "Analyzes the file data and collects all references to external - assets. Returns a set of ids." + "Given a fdata (file data), returns all media references." [data] (let [xform (comp (map :objects) @@ -138,7 +140,7 @@ ;; them. (db/delete! conn :file-media-object {:id (:id mobj)})))) -(defn- clean-file-frame-thumbnails! +(defn- clean-file-object-thumbnails! [conn file-id data] (let [stored (->> (db/query conn :file-object-thumbnail {:file-id file-id} @@ -171,7 +173,7 @@ (l/debug :hint "delete file thumbnails" :file-id file-id :total (:next.jdbc/update-count res))))) (def ^:private - sql:retrieve-client-files + sql:get-files-for-library "select f.data, f.modified_at from file as f left join file_library_rel as fl on (fl.file_id = f.id) @@ -181,75 +183,76 @@ order by f.modified_at desc limit 1") -(defn- retrieve-client-files - "search al files that use the given library. - Returns a sequence of file-data (only reads database rows one by one)." - [conn library-id] - (let [get-chunk (fn [cursor] - (let [rows (db/exec! conn [sql:retrieve-client-files library-id cursor])] - [(some-> rows peek :modified-at) - (map (comp blob/decode :data) rows)]))] - - (d/iteration get-chunk - :vf second - :kf first - :initk (dt/now)))) - (defn- clean-deleted-components! "Performs the garbage collection of unreferenced deleted components." - [conn library-id library-data] - (let [find-used-components-file - (fn [components file-data] - ; Find which of the components are used in the file. - (into #{} - (filter #(ctf/used-in? file-data library-id % :component)) - components)) + [conn file-id data] + (letfn [(get-files-chunk [cursor] + (let [rows (db/exec! conn [sql:get-files-for-library file-id cursor])] + [(some-> rows peek :modified-at) + (map (comp blob/decode :data) rows)])) - find-unused-components - (fn [components files-data] - ; Find what components are NOT used in any of the files. - (loop [files-data files-data - components components] - (let [file-data (first files-data)] - (if (or (nil? file-data) (empty? components)) - components - (let [used-components-file (find-used-components-file components file-data)] - (recur (rest files-data) - (into #{} (remove used-components-file) components))))))) + (get-used-components [fdata components] + ;; Find which of the components are used in the file. + (into #{} + (filter #(ctf/used-in? fdata file-id % :component)) + components)) - deleted-components (set (ctkl/deleted-components-seq library-data)) - unused-components (find-unused-components deleted-components - (cons library-data - (retrieve-client-files conn library-id))) - total (count unused-components)] + (get-unused-components [components files-data] + ;; Find and return a set of unused components (on all files). + (reduce (fn [components fdata] + (if (seq components) + (->> (get-used-components fdata components) + (set/difference components)) + (reduced components))) - (when-not (zero? total) - (l/debug :hint "clean deleted components" :total total) - (let [new-data (reduce #(ctkl/delete-component %1 (:id %2)) - library-data - unused-components)] - (db/update! conn :file - {:data (blob/encode new-data)} - {:id library-id}))))) + components + files-data))] -(def ^:private sql:get-unused-fragments - "SELECT id FROM file_data_fragment - WHERE file_id = ? AND id != ALL(?::uuid[])") + (let [deleted (into #{} (ctkl/deleted-components-seq data)) + unused (->> (d/iteration get-files-chunk :vf second :kf first :initk (dt/now)) + (cons data) + (get-unused-components deleted) + (mapv :id))] + + (when (seq unused) + (l/debug :hint "clean deleted components" :total (count unused)) + + (let [data (reduce ctkl/delete-component data unused)] + (db/update! conn :file + {:data (blob/encode data)} + {:id file-id})))))) (defn- clean-data-fragments! [conn file-id data] - (let [used (->> (concat (vals data) - (vals (:pages-index data))) - (into #{} (comp (filter pmap/pointer-map?) - (map pmap/get-id))) - (db/create-array conn "uuid")) - rows (db/exec! conn [sql:get-unused-fragments file-id used])] - (doseq [fragment-id (map :id rows)] - (l/trace :hint "remove unused file data fragment" :id (str fragment-id)) - (db/delete! conn :file-data-fragment {:id fragment-id :file-id file-id})))) + (letfn [(get-pointers-chunk [cursor] + (let [sql (str "select id, data, created_at " + " from file_change " + " where file_id = ? " + " and data is not null " + " and created_at < ? " + " order by created_at desc " + " limit 1;") + rows (db/exec! conn [sql file-id cursor])] + [(some-> rows peek :created-at) + (mapcat (comp files/get-all-pointer-ids blob/decode :data) rows)]))] + + (let [used (into (files/get-all-pointer-ids data) + (d/iteration get-pointers-chunk + :vf second + :kf first + :initk (dt/now))) + + sql (str "select id from file_data_fragment " + " where file_id = ? AND id != ALL(?::uuid[])") + used (db/create-array conn "uuid" used) + rows (db/exec! conn [sql file-id used])] + + (doseq [fragment-id (map :id rows)] + (l/trace :hint "remove unused file data fragment" :id (str fragment-id)) + (db/delete! conn :file-data-fragment {:id fragment-id :file-id file-id}))))) (defn- process-file - [conn {:keys [id data revn modified-at features] :as file}] + [{:keys [::db/conn]} {:keys [id data revn modified-at features] :as file}] (l/debug :hint "processing file" :id id :modified-at modified-at) (binding [pmap/*load-fn* (partial files/load-pointer conn id)] @@ -258,7 +261,7 @@ (pmg/migrate-data))] (clean-file-media! conn id data) - (clean-file-frame-thumbnails! conn id data) + (clean-file-object-thumbnails! conn id data) (clean-file-thumbnails! conn id revn) (clean-deleted-components! conn id data) @@ -268,5 +271,4 @@ ;; Mark file as trimmed (db/update! conn :file {:has-media-trimmed true} - {:id id}) - nil))) + {:id id})))) diff --git a/backend/test/backend_tests/helpers.clj b/backend/test/backend_tests/helpers.clj index 7c99f12d7..6b05766bc 100644 --- a/backend/test/backend_tests/helpers.clj +++ b/backend/test/backend_tests/helpers.clj @@ -52,7 +52,8 @@ (def defaults {:database-uri "postgresql://postgres/penpot_test" - :redis-uri "redis://redis/1"}) + :redis-uri "redis://redis/1" + :file-change-snapshot-every 1}) (def config (->> (cf/read-env "penpot-test") @@ -63,7 +64,9 @@ [:enable-secure-session-cookies :enable-email-verification :enable-smtp - :enable-quotes]) + :enable-quotes + :enable-fdata-storage-pointer-map + :enable-fdata-storage-objets-map]) (def test-init-sql ["alter table project_profile_rel set unlogged;\n" @@ -134,7 +137,7 @@ :app.auth.oidc/generic-provider :app.setup/builtin-templates :app.auth.oidc/routes - :app.worker/executors-monitor + :app.worker/monitor :app.http.oauth/handler :app.notifications/handler :app.loggers.mattermost/reporter @@ -467,6 +470,14 @@ [sql] (db/exec! *pool* sql)) +(defn db-delete! + [& params] + (apply db/delete! *pool* params)) + +(defn db-update! + [& params] + (apply db/update! *pool* params)) + (defn db-insert! [& params] (apply db/insert! *pool* params)) diff --git a/backend/test/backend_tests/rpc_file_test.clj b/backend/test/backend_tests/rpc_file_test.clj index 939d90633..87e18ef50 100644 --- a/backend/test/backend_tests/rpc_file_test.clj +++ b/backend/test/backend_tests/rpc_file_test.clj @@ -122,8 +122,111 @@ (t/is (= 0 (count result)))))) )) -(t/deftest file-gc-task - (letfn [(create-file-media-object [{:keys [profile-id file-id]}] +(t/deftest file-gc-with-fragments + (letfn [(update-file! [& {:keys [profile-id file-id changes revn] :or {revn 0}}] + (let [params {::th/type :update-file + ::rpc/profile-id profile-id + :id file-id + :session-id (uuid/random) + :revn revn + :components-v2 true + :changes changes} + out (th/command! params)] + (t/is (nil? (:error out))) + (:result out)))] + + (let [profile (th/create-profile* 1) + file (th/create-file* 1 {:profile-id (:id profile) + :project-id (:default-project-id profile) + :is-shared false}) + + page-id (uuid/random) + shape-id (uuid/random)] + + ;; Preventive file-gc + (let [res (th/run-task! "file-gc" {:min-age 0})] + (t/is (= 1 (:processed res)))) + + ;; Check the number of fragments before adding the page + (let [rows (th/db-query :file-data-fragment {:file-id (:id file)})] + (t/is (= 1 (count rows)))) + + ;; Add page + (update-file! + :file-id (:id file) + :profile-id (:id profile) + :revn 0 + :changes + [{:type :add-page + :name "test" + :id page-id}]) + + ;; Check the number of fragments + (let [rows (th/db-query :file-data-fragment {:file-id (:id file)})] + (t/is (= 2 (count rows)))) + + + ;; Check the number of fragments + (let [rows (th/db-query :file-data-fragment {:file-id (:id file)})] + (t/is (= 2 (count rows)))) + + ;; The file-gc should remove unused fragments + (let [res (th/run-task! "file-gc" {:min-age 0})] + (t/is (= 1 (:processed res)))) + + + ;; Add shape to page that should add a new fragment + (update-file! + :file-id (:id file) + :profile-id (:id profile) + :revn 0 + :changes + [{:type :add-obj + :page-id page-id + :id shape-id + :parent-id uuid/zero + :frame-id uuid/zero + :components-v2 true + :obj {:id shape-id + :name "image" + :frame-id uuid/zero + :parent-id uuid/zero + :type :rect}}]) + + ;; Check the number of fragments + (let [rows (th/db-query :file-data-fragment {:file-id (:id file)})] + (t/is (= 3 (count rows)))) + + ;; The file-gc should remove unused fragments + (let [res (th/run-task! "file-gc" {:min-age 0})] + (t/is (= 1 (:processed res)))) + + ;; Check the number of fragments; should be 3 because changes + ;; are also holding pointers to fragments; + (let [rows (th/db-query :file-data-fragment {:file-id (:id file)})] + (t/is (= 3 (count rows)))) + + ;; Lets proceed to delete all changes + (th/db-delete! :file-change {:file-id (:id file)}) + (th/db-update! :file + {:has-media-trimmed false} + {:id (:id file)}) + + + ;; The file-gc should remove fragments related to changes + ;; snapshots previously deleted. + (let [res (th/run-task! "file-gc" {:min-age 0})] + (t/is (= 1 (:processed res)))) + + ;; Check the number of fragments; + (let [rows (th/db-query :file-data-fragment {:file-id (:id file)})] + (t/is (= 2 (count rows)))) + + ))) + + +(t/deftest file-gc-task-with-thumbnails + (letfn [(add-file-media-object [& {:keys [profile-id file-id]}] (let [mfile {:filename "sample.jpg" :path (th/tempfile "backend_tests/test_files/sample.jpg") :mtype "image/jpeg" @@ -140,7 +243,7 @@ (t/is (nil? (:error out))) (:result out))) - (update-file [{:keys [profile-id file-id changes revn] :or {revn 0}}] + (update-file! [& {:keys [profile-id file-id changes revn] :or {revn 0}}] (let [params {::th/type :update-file ::rpc/profile-id profile-id :id file-id @@ -159,29 +262,31 @@ :project-id (:default-project-id profile) :is-shared false}) - fmo1 (create-file-media-object {:profile-id (:id profile) - :file-id (:id file)}) - fmo2 (create-file-media-object {:profile-id (:id profile) - :file-id (:id file)}) + fmo1 (add-file-media-object :profile-id (:id profile) :file-id (:id file)) + fmo2 (add-file-media-object :profile-id (:id profile) :file-id (:id file)) shid (uuid/random) - ures (update-file - {:file-id (:id file) - :profile-id (:id profile) - :revn 0 - :changes - [{:type :add-obj - :page-id (first (get-in file [:data :pages])) - :id shid - :parent-id uuid/zero - :frame-id uuid/zero - :components-v2 true - :obj {:id shid - :name "image" - :frame-id uuid/zero - :parent-id uuid/zero - :type :image - :metadata {:id (:id fmo1)}}}]})] + page-id (first (get-in file [:data :pages]))] + + + ;; Update file inserting a new image object + (update-file! + :file-id (:id file) + :profile-id (:id profile) + :revn 0 + :changes + [{:type :add-obj + :page-id page-id + :id shid + :parent-id uuid/zero + :frame-id uuid/zero + :components-v2 true + :obj {:id shid + :name "image" + :frame-id uuid/zero + :parent-id uuid/zero + :type :image + :metadata {:id (:id fmo1)}}}]) ;; Check that reference storage objects on filemediaobjects ;; are the same because of deduplication feature. @@ -190,28 +295,27 @@ ;; If we launch gc-touched-task, we should have 2 items to ;; freeze because of the deduplication (we have uploaded 2 times - ;; 2 two same files). + ;; the same files). + (let [task (:app.storage/gc-touched-task th/*system*) res (task {:min-age (dt/duration 0)})] (t/is (= 2 (:freeze res))) (t/is (= 0 (:delete res)))) ;; run the file-gc task immediately without forced min-age - (let [task (:app.tasks.file-gc/handler th/*system*) - res (task {})] + (let [res (th/run-task! "file-gc")] (t/is (= 0 (:processed res)))) ;; run the task again - (let [task (:app.tasks.file-gc/handler th/*system*) - res (task {:min-age (dt/duration 0)})] + (let [res (th/run-task! "file-gc" {:min-age 0})] (t/is (= 1 (:processed res)))) ;; retrieve file and check trimmed attribute - (let [row (db/exec-one! th/*pool* ["select * from file where id = ?" (:id file)])] + (let [row (th/db-get :file {:id (:id file)})] (t/is (true? (:has-media-trimmed row)))) ;; check file media objects - (let [rows (db/exec! th/*pool* ["select * from file_media_object where file_id = ?" (:id file)])] + (let [rows (th/db-exec! ["select * from file_media_object where file_id = ?" (:id file)])] (t/is (= 1 (count rows)))) ;; The underlying storage objects are still available. @@ -221,12 +325,13 @@ (t/is (some? (sto/get-object storage (:thumbnail-id fmo1)))) ;; proceed to remove usage of the file - (update-file {:file-id (:id file) - :profile-id (:id profile) - :revn 0 - :changes [{:type :del-obj - :page-id (first (get-in file [:data :pages])) - :id shid}]}) + (update-file! + :file-id (:id file) + :profile-id (:id profile) + :revn 0 + :changes [{:type :del-obj + :page-id (first (get-in file [:data :pages])) + :id shid}]) ;; Now, we have deleted the usage of pointers to the ;; file-media-objects, if we paste file-gc, they should be marked