From fde0f3c182aee4ae638c3476f78a9b8610b55723 Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Wed, 15 Jan 2025 12:19:14 +0100 Subject: [PATCH 1/3] :bug: Pass correct default options on db/plan fn --- backend/src/app/db.clj | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/src/app/db.clj b/backend/src/app/db.clj index d02a8ee4e..b22ecd899 100644 --- a/backend/src/app/db.clj +++ b/backend/src/app/db.clj @@ -414,9 +414,12 @@ row)) (defn plan - [ds sql] - (-> (get-connectable ds) - (jdbc/plan sql sql/default-opts))) + ([ds sql] + (-> (get-connectable ds) + (jdbc/plan sql default-opts))) + ([ds sql opts] + (-> (get-connectable ds) + (jdbc/plan sql (merge default-opts opts))))) (defn cursor "Return a lazy seq of rows using server side cursors" From 5513daf17d1ee72bafae7d9fd7c17b7b743fd06f Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Wed, 15 Jan 2025 12:36:13 +0100 Subject: [PATCH 2/3] :recycle: Make the media cleaning mechanism on file-gc task more efficient Replaces the use of db/cursor with db/plan, that teorethically allows processing large results without consuming all result set in memory --- backend/src/app/tasks/file_gc.clj | 23 +++++--- backend/test/backend_tests/helpers.clj | 2 +- backend/test/backend_tests/rpc_file_test.clj | 56 +++++++++----------- 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/backend/src/app/tasks/file_gc.clj b/backend/src/app/tasks/file_gc.clj index 7e3c3ee27..facc7b60f 100644 --- a/backend/src/app/tasks/file_gc.clj +++ b/backend/src/app/tasks/file_gc.clj @@ -53,16 +53,27 @@ RETURNING id") (def ^:private xf:collect-used-media - (comp (map :data) (mapcat bfc/collect-used-media))) + (comp + (map :data) + (mapcat bfc/collect-used-media))) + +(def ^:private plan-opts + {:fetch-size 1 + :concurrency :read-only + :cursors :close + :result-type :forward-only}) (defn- clean-file-media! "Performs the garbage collection of file media objects." [{:keys [::db/conn] :as cfg} {:keys [id] :as file}] - (let [used (into #{} - xf:collect-used-media - (cons file - (->> (db/cursor conn [sql:get-snapshots id]) - (map (partial decode-file cfg))))) + (let [xform (comp + (map (partial decode-file cfg)) + xf:collect-used-media) + + used (->> (db/plan conn [sql:get-snapshots id] plan-opts) + (transduce xform conj #{})) + used (into used xf:collect-used-media [file]) + ids (db/create-array conn "uuid" used) unused (->> (db/exec! conn [sql:mark-file-media-object-deleted id ids]) (into #{} (map :id)))] diff --git a/backend/test/backend_tests/helpers.clj b/backend/test/backend_tests/helpers.clj index 3aa7d1589..1e1fb7abf 100644 --- a/backend/test/backend_tests/helpers.clj +++ b/backend/test/backend_tests/helpers.clj @@ -62,7 +62,7 @@ (def default {:database-uri "postgresql://postgres/penpot_test" :redis-uri "redis://redis/1" - :file-snapshot-every 1}) + :auto-file-snapshot-every 1}) (def config (cf/read-config :prefix "penpot-test" diff --git a/backend/test/backend_tests/rpc_file_test.clj b/backend/test/backend_tests/rpc_file_test.clj index 679d5221e..733c4a1fd 100644 --- a/backend/test/backend_tests/rpc_file_test.clj +++ b/backend/test/backend_tests/rpc_file_test.clj @@ -383,8 +383,19 @@ ;; as deleted. (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file)}))) + ;; This only clears fragments, the file media objects still referenced because + ;; snapshots are preserved (let [res (th/run-task! :objects-gc {:min-age 0})] - (t/is (= 3 (:processed res)))) + (t/is (= 2 (:processed res)))) + + ;; Mark all snapshots to be a non-snapshot file change + (th/db-exec! ["update file_change set data = null where file_id = ?" (:id file)]) + (th/db-exec! ["update file set has_media_trimmed = false where id = ?" (:id file)]) + + ;; Rerun the file-gc and objects-gc + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file)}))) + (let [res (th/run-task! :objects-gc {:min-age 0})] + (t/is (= 2 (:processed res)))) ;; Now that file-gc have deleted the file-media-object usage, ;; lets execute the touched-gc task, we should see that two of @@ -417,20 +428,6 @@ ;; (th/print-result! out) (t/is (nil? (:error out))) - (:result out))) - - (update-file! [& {:keys [profile-id file-id changes revn] :or {revn 0}}] - (let [params {::th/type :update-file - ::rpc/profile-id profile-id - :id file-id - :session-id (uuid/random) - :revn revn - :vern 0 - :components-v2 true - :changes changes} - out (th/command! params)] - ;; (th/print-result! out) - (t/is (nil? (:error out))) (:result out)))] (let [storage (:app.storage/storage th/*system*) @@ -550,8 +547,20 @@ ;; as deleted. (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file)}))) + ;; This only removes unused fragments, file media are still + ;; referenced on snapshots. (let [res (th/run-task! :objects-gc {:min-age 0})] - (t/is (= 7 (:processed res)))) + (t/is (= 2 (:processed res)))) + + ;; Mark all snapshots to be a non-snapshot file change + (th/db-exec! ["update file set has_media_trimmed = false where id = ?" (:id file)]) + (th/db-exec! ["update file_change set data = null where file_id = ?" (:id file)]) + + ;; Rerun file-gc and objects-gc task for the same file once all snapshots are + ;; "expired/deleted" + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file)}))) + (let [res (th/run-task! :objects-gc {:min-age 0})] + (t/is (= 6 (:processed res)))) (let [rows (th/db-query :file-data-fragment {:file-id (:id file) :deleted-at nil})] @@ -591,20 +600,7 @@ ;; (th/print-result! out) (t/is (nil? (:error out))) - (:result out))) - - #_(update-file! [& {:keys [profile-id file-id changes revn] :or {revn 0}}] - (let [params {::th/type :update-file - ::rpc/profile-id profile-id - :id file-id - :session-id (uuid/random) - :revn revn - :features cfeat/supported-features - :changes changes} - out (th/command! params)] - ;; (th/print-result! out) - (t/is (nil? (:error out))) - (:result out)))] + (:result out)))] (let [storage (:app.storage/storage th/*system*) profile (th/create-profile* 1) From 151aedcf9115ea3cc8f66762e40719e8656c15b8 Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Mon, 20 Jan 2025 16:35:14 +0100 Subject: [PATCH 3/3] :recycle: Make the components cleaning mechanism on file-gc task more efficient --- backend/src/app/tasks/file_gc.clj | 67 ++-- backend/test/backend_tests/rpc_file_test.clj | 326 +++++++++++++++++++ 2 files changed, 357 insertions(+), 36 deletions(-) diff --git a/backend/src/app/tasks/file_gc.clj b/backend/src/app/tasks/file_gc.clj index facc7b60f..7bbdfa07e 100644 --- a/backend/src/app/tasks/file_gc.clj +++ b/backend/src/app/tasks/file_gc.clj @@ -26,7 +26,6 @@ [app.util.pointer-map :as pmap] [app.util.time :as dt] [app.worker :as wrk] - [clojure.set :as set] [integrant.core :as ig])) (declare ^:private get-file) @@ -156,51 +155,47 @@ AND f.deleted_at IS null ORDER BY f.modified_at ASC") +(def ^:private xf:map-id (map :id)) + +(defn- get-used-components + "Given a file and a set of components marked for deletion, return a + filtered set of component ids that are still un use" + [components library-id {:keys [data]}] + (filter #(ctf/used-in? data library-id % :component) components)) + (defn- clean-deleted-components! "Performs the garbage collection of unreferenced deleted components." [{:keys [::db/conn] :as cfg} {:keys [data] :as file}] (let [file-id (:id file) - get-used-components - (fn [data components] - ;; Find which of the components are used in the file. - (into #{} - (filter #(ctf/used-in? data file-id % :component)) - components)) + deleted-components + (ctkl/deleted-components-seq data) - get-unused-components - (fn [components files] - ;; Find and return a set of unused components (on all files). - (reduce (fn [components {:keys [data]}] - (if (seq components) - (->> (get-used-components data components) - (set/difference components)) - (reduced components))) + xform + (mapcat (partial get-used-components deleted-components file-id)) - components - files)) + used-remote + (->> (db/plan conn [sql:get-files-for-library file-id] plan-opts) + (transduce (comp (map (partial decode-file cfg)) xform) conj #{})) - process-fdata - (fn [data unused] - (reduce (fn [data id] - (l/trc :hint "delete component" - :component-id (str id) - :file-id (str file-id)) - (ctkl/delete-component data id)) - data - unused)) + used-local + (into #{} xform [file]) - deleted (into #{} (ctkl/deleted-components-seq data)) - - unused (->> (db/cursor conn [sql:get-files-for-library file-id] {:chunk-size 1}) - (map (partial decode-file cfg)) - (cons file) - (get-unused-components deleted) - (mapv :id) - (set)) - - file (update file :data process-fdata unused)] + unused + (transduce xf:map-id disj + (into #{} xf:map-id deleted-components) + (concat used-remote used-local)) + file + (update file :data + (fn [data] + (reduce (fn [data id] + (l/trc :hint "delete component" + :component-id (str id) + :file-id (str file-id)) + (ctkl/delete-component data id)) + data + unused)))] (l/dbg :hint "clean" :rel "components" :file-id (str file-id) :total (count unused)) file)) diff --git a/backend/test/backend_tests/rpc_file_test.clj b/backend/test/backend_tests/rpc_file_test.clj index 733c4a1fd..b95358101 100644 --- a/backend/test/backend_tests/rpc_file_test.clj +++ b/backend/test/backend_tests/rpc_file_test.clj @@ -1332,3 +1332,329 @@ (t/is (every? #(bytes? (:data %)) rows)) (t/is (every? #(nil? (:data-ref-id %)) rows)) (t/is (every? #(nil? (:data-backend %)) rows))))) + +(t/deftest file-gc-with-components-1 + (let [storage (:app.storage/storage th/*system*) + profile (th/create-profile* 1) + file (th/create-file* 1 {:profile-id (:id profile) + :project-id (:default-project-id profile) + :is-shared false}) + + s-id-1 (uuid/random) + s-id-2 (uuid/random) + c-id (uuid/random) + + page-id (first (get-in file [:data :pages]))] + + (let [rows (th/db-query :file-data-fragment {:file-id (:id file) + :deleted-at nil})] + (t/is (= (count rows) 1))) + + ;; Update file inserting new component + (update-file! + :file-id (:id file) + :profile-id (:id profile) + :revn 0 + :vern 0 + :changes + [{:type :add-obj + :page-id page-id + :id s-id-1 + :parent-id uuid/zero + :frame-id uuid/zero + :components-v2 true + :obj (cts/setup-shape + {:id s-id-1 + :name "Board" + :frame-id uuid/zero + :parent-id uuid/zero + :type :frame + :main-instance true + :component-root true + :component-file (:id file) + :component-id c-id})} + + {:type :add-obj + :page-id page-id + :id s-id-2 + :parent-id uuid/zero + :frame-id uuid/zero + :components-v2 true + :obj (cts/setup-shape + {:id s-id-2 + :name "Board" + :frame-id uuid/zero + :parent-id uuid/zero + :type :frame + :main-instance false + :component-root true + :component-file (:id file) + :component-id c-id})} + + {:type :add-component + :path "" + :name "Board" + :main-instance-id s-id-1 + :main-instance-page page-id + :id c-id + :anotation nil}]) + + ;; Run the file-gc task immediately without forced min-age + (t/is (false? (th/run-task! :file-gc {:file-id (:id file)}))) + + ;; Run the task again + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file)}))) + + ;; Retrieve file and check trimmed attribute + (let [row (th/db-get :file {:id (:id file)})] + (t/is (true? (:has-media-trimmed row)))) + + ;; Check that component exists + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + component (get-in result [:data :components c-id])] + + (t/is (some? component)) + (t/is (nil? (:objects component))))) + + ;; Now proceed to delete a component + (update-file! + :file-id (:id file) + :profile-id (:id profile) + :revn 0 + :vern 0 + :changes + [{:type :del-component + :id c-id} + {:type :del-obj + :page-id page-id + :id s-id-1 + :ignore-touched true}]) + + ;; ;; Check that component is marked as deleted + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + component (get-in result [:data :components c-id])] + (t/is (true? (:deleted component))) + (t/is (some? (not-empty (:objects component)))))) + + ;; Re-run the file-gc task + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file)}))) + (let [row (th/db-get :file {:id (:id file)})] + (t/is (true? (:has-media-trimmed row)))) + + ;; Check that component is still there after file-gc task + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + component (get-in result [:data :components c-id])] + (t/is (true? (:deleted component))) + (t/is (some? (not-empty (:objects component)))))) + + ;; Now delete the last instance using deleted component + (update-file! + :file-id (:id file) + :profile-id (:id profile) + :revn 0 + :vern 0 + :changes + [{:type :del-obj + :page-id page-id + :id s-id-2 + :ignore-touched true}]) + + ;; Now, we have deleted the usage of component if we pass file-gc, + ;; that component should be deleted + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file)}))) + + ;; Check that component is properly removed + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + components (get-in result [:data :components])] + (t/is (not (contains? components c-id))))))) + +(t/deftest file-gc-with-components-2 + (let [storage (:app.storage/storage th/*system*) + profile (th/create-profile* 1) + file-1 (th/create-file* 1 {:profile-id (:id profile) + :project-id (:default-project-id profile) + :is-shared true}) + + file-2 (th/create-file* 2 {:profile-id (:id profile) + :project-id (:default-project-id profile) + :is-shared false}) + + rel (th/link-file-to-library* + {:file-id (:id file-2) + :library-id (:id file-1)}) + + s-id-1 (uuid/random) + s-id-2 (uuid/random) + c-id (uuid/random) + + f1-page-id (first (get-in file-1 [:data :pages])) + f2-page-id (first (get-in file-2 [:data :pages]))] + + ;; Update file library inserting new component + (update-file! + :file-id (:id file-1) + :profile-id (:id profile) + :revn 0 + :vern 0 + :changes + [{:type :add-obj + :page-id f1-page-id + :id s-id-1 + :parent-id uuid/zero + :frame-id uuid/zero + :components-v2 true + :obj (cts/setup-shape + {:id s-id-1 + :name "Board" + :frame-id uuid/zero + :parent-id uuid/zero + :type :frame + :main-instance true + :component-root true + :component-file (:id file-1) + :component-id c-id})} + {:type :add-component + :path "" + :name "Board" + :main-instance-id s-id-1 + :main-instance-page f1-page-id + :id c-id + :anotation nil}]) + + ;; Instanciate a component in a different file + (update-file! + :file-id (:id file-2) + :profile-id (:id profile) + :revn 0 + :vern 0 + :changes + [{:type :add-obj + :page-id f2-page-id + :id s-id-2 + :parent-id uuid/zero + :frame-id uuid/zero + :components-v2 true + :obj (cts/setup-shape + {:id s-id-2 + :name "Board" + :frame-id uuid/zero + :parent-id uuid/zero + :type :frame + :main-instance false + :component-root true + :component-file (:id file-1) + :component-id c-id})}]) + + ;; Run the file-gc on file and library + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file-1)}))) + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file-2)}))) + + ;; Check that component exists + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file-1)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + component (get-in result [:data :components c-id])] + + (t/is (some? component)) + (t/is (nil? (:objects component))))) + + ;; Now proceed to delete a component + (update-file! + :file-id (:id file-1) + :profile-id (:id profile) + :revn 0 + :vern 0 + :changes + [{:type :del-component + :id c-id} + {:type :del-obj + :page-id f1-page-id + :id s-id-1 + :ignore-touched true}]) + + ;; Check that component is marked as deleted + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file-1)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + component (get-in result [:data :components c-id])] + (t/is (true? (:deleted component))) + (t/is (some? (not-empty (:objects component)))))) + + ;; Re-run the file-gc task + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file-1)}))) + (t/is (false? (th/run-task! :file-gc {:min-age 0 :file-id (:id file-2)}))) + + ;; Check that component is still there after file-gc task + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file-1)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + component (get-in result [:data :components c-id])] + (t/is (true? (:deleted component))) + (t/is (some? (not-empty (:objects component)))))) + + ;; Now delete the last instance using deleted component + (update-file! + :file-id (:id file-2) + :profile-id (:id profile) + :revn 0 + :vern 0 + :changes + [{:type :del-obj + :page-id f2-page-id + :id s-id-2 + :ignore-touched true}]) + + ;; Mark + (th/db-exec! ["update file set has_media_trimmed = false where id = ?" (:id file-1)]) + + ;; Now, we have deleted the usage of component if we pass file-gc, + ;; that component should be deleted + (t/is (true? (th/run-task! :file-gc {:min-age 0 :file-id (:id file-1)}))) + + ;; Check that component is properly removed + (let [data {::th/type :get-file + ::rpc/profile-id (:id profile) + :id (:id file-1)} + out (th/command! data)] + + (t/is (th/success? out)) + (let [result (:result out) + components (get-in result [:data :components])] + (t/is (not (contains? components c-id))))))) +