Fork 0
mirror of https://github.com/penpot/penpot.git synced 2025-03-18 10:41:29 -05:00

Add deduplication for file object thumbnails

This commit is contained in:
Andrey Antukh 2023-11-24 10:40:56 +01:00
parent 6d49e1cac5
commit d82ebdc034
7 changed files with 278 additions and 70 deletions

View file

@ -844,7 +844,7 @@
::l/sync? true))))))
(defmethod read-section :v1/sobjects
[{:keys [::sto/storage ::db/conn ::input ::overwrite?]}]
[{:keys [::sto/storage ::db/conn ::input ::overwrite? ::timestamp]}]
(let [storage (media/configure-assets-storage storage)
ids (read-obj! input)
thumb? (into #{} (map :media-id) (:thumbnails @*state*))]
@ -865,15 +865,14 @@
content (-> (sto/content resource size)
(sto/wrap-with-hash hash))
params (assoc mdata ::sto/content content)
params (-> mdata
(assoc ::sto/content content)
(assoc ::sto/deduplicate? true)
(assoc ::sto/touched-at timestamp))
params (if (thumb? id)
(-> params
(assoc ::sto/deduplicate? false)
(assoc :bucket "file-object-thumbnail"))
(-> params
(assoc ::sto/deduplicate? true)
(assoc ::sto/touched-at (dt/now))
(assoc :bucket "file-media-object")))
(assoc params :bucket "file-object-thumbnail")
(assoc params :bucket "file-media-object"))
sobject (sto/put-object! storage params)]
@ -893,7 +892,7 @@
(let [file-id (lookup-index (:file-id item))]
(if (= file-id (:file-id item))
(l/warn :hint "ignoring file media object" :file-id (str (:file-id item)) ::l/sync? true)
(l/warn :hint "ignoring file media object" :file-id (str file-id) ::l/sync? true)
(db/insert! conn :file-media-object
(-> item
(assoc :file-id file-id)
@ -908,8 +907,7 @@
:media-id (str (:media-id item))
:object-id (:object-id item)
::l/sync? true)
(db/insert! conn :file-tagged-object-thumbnail
(update item :media-id lookup-index)
(db/insert! conn :file-tagged-object-thumbnail item
{:on-conflict-do-nothing overwrite?})))))
(defn- lookup-index

View file

@ -249,7 +249,8 @@
(sto/wrap-with-hash hash))
media (sto/put-object! storage
{::sto/content data
::sto/deduplicate? false
::sto/deduplicate? true
::sto/touched-at (dt/now)
:content-type mtype
:bucket "file-object-thumbnail"})]
@ -292,7 +293,7 @@
:object-id object-id}
{::db/for-update? true})]
(sto/del-object! storage media-id)
(sto/touch-object! storage media-id)
(db/delete! conn :file-tagged-object-thumbnail
{:file-id file-id
:object-id object-id})

View file

@ -322,10 +322,11 @@
;; and the object is still valid) or deleted (no more references to
;; this object so is ready to be deleted).
(declare sql:retrieve-touched-objects-chunk)
(declare sql:retrieve-file-media-object-nrefs)
(declare sql:retrieve-team-font-variant-nrefs)
(declare sql:retrieve-file-object-thumbnail-nrefs)
(declare sql:retrieve-profile-nrefs)
(declare sql:retrieve-team-font-variant-nrefs)
(declare sql:retrieve-touched-objects-chunk)
(defmethod ig/pre-init-spec ::gc-touched-task [_]
(s/keys :req [::db/pool]))
@ -341,6 +342,9 @@
(get-profile-nrefs [conn id]
(-> (db/exec-one! conn [sql:retrieve-profile-nrefs id id]) :nrefs))
(get-file-object-thumbnails [conn id]
(-> (db/exec-one! conn [sql:retrieve-file-object-thumbnail-nrefs id]) :nrefs))
(mark-freeze-in-bulk [conn ids]
(db/exec-one! conn ["update storage_object set touched_at=null where id = ANY(?)"
(db/create-array conn "uuid" ids)]))
@ -410,9 +414,10 @@
groups (retrieve-touched conn)]
(if-let [[bucket ids] (first groups)]
(let [[f d] (case bucket
"file-media-object" (process-objects! conn get-file-media-object-nrefs ids bucket)
"team-font-variant" (process-objects! conn get-team-font-variant-nrefs ids bucket)
"profile" (process-objects! conn get-profile-nrefs ids bucket)
"file-media-object" (process-objects! conn get-file-media-object-nrefs ids bucket)
"team-font-variant" (process-objects! conn get-team-font-variant-nrefs ids bucket)
"file-object-thumbnail" (process-objects! conn get-file-object-thumbnails ids bucket)
"profile" (process-objects! conn get-profile-nrefs ids bucket)
(ex/raise :type :internal
:code :unexpected-unknown-reference
:hint (dm/fmt "unknown reference %" bucket)))]
@ -435,6 +440,9 @@
"select ((select count(*) from file_media_object where media_id = ?) +
(select count(*) from file_media_object where thumbnail_id = ?)) as nrefs")
(def sql:retrieve-file-object-thumbnail-nrefs
"select (select count(*) from file_tagged_object_thumbnail where media_id = ?) as nrefs")
(def sql:retrieve-team-font-variant-nrefs
"select ((select count(*) from team_font_variant where woff1_file_id = ?) +
(select count(*) from team_font_variant where woff2_file_id = ?) +

View file

@ -118,15 +118,15 @@
;; have fill-image attribute (which initially
;; designed for :path shapes).
(keep :id)
(concat [(:fill-image obj)
(:metadata obj)]
(map :fill-image (:fills obj))
(map :stroke-image (:strokes obj))
(->> (:content obj)
(tree-seq map? :children)
(mapcat :fills)
(map :fill-image)))))))
(keep :id)
(concat [(:fill-image obj)
(:metadata obj)]
(map :fill-image (:fills obj))
(map :stroke-image (:strokes obj))
(->> (:content obj)
(tree-seq map? :children)
(mapcat :fills)
(map :fill-image)))))))
pages (concat
(vals (:pages-index data))
(vals (:components data)))]
@ -142,10 +142,10 @@
(remove #(contains? used (:id %))))]
(doseq [mobj unused]
(l/debug :hint "delete file media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
(l/dbg :hint "delete file media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
;; NOTE: deleting the file-media-object in the database
;; automatically marks as touched the referenced storage
@ -154,21 +154,23 @@
;; them.
(db/delete! conn :file-media-object {:id (:id mobj)}))))
(defn- clean-file-tagged-object-thumbnails!
(defn- clean-file-object-thumbnails!
[{:keys [::db/conn ::sto/storage]} file-id data]
(let [stored (->> (db/query conn :file_tagged_object_thumbnail
(let [stored (->> (db/query conn :file-tagged-object-thumbnail
{:file-id file-id}
{:columns [:object-id]})
(into #{} (map :object-id)))
using (into #{}
(fn [{:keys [id objects]}]
(->> (ctt/get-frames objects)
(thc/fmt-object-id file-id id (:id %) "frame")
(thc/fmt-object-id file-id id (:id %) "component"))))))
(mapcat (fn [{:keys [id objects]}]
(->> (ctt/get-frames objects)
(map #(assoc % :page-id id)))))
(mapcat (fn [{:keys [id page-id]}]
(thc/fmt-object-id file-id page-id id "frame")
(thc/fmt-object-id file-id page-id id "component")))))
(vals (:pages-index data)))
unused (set/difference stored using)]
@ -179,15 +181,15 @@
" returning media_id")
res (db/exec! conn [sql file-id (db/create-array conn "text" unused)])]
(l/dbg :hint "delete file object thumbnails"
:file-id (str file-id)
:total (count res))
(doseq [media-id (into #{} (keep :media-id) res)]
;; Mark as deleted the storage object related with the
;; photo-id field.
(l/trace :hint "mark storage object as deleted" :id media-id)
(sto/del-object! storage media-id))
(l/debug :hint "delete file object thumbnails"
:file-id file-id
:total (count res))))))
(l/trc :hint "touch file object thumbnail storage object" :id (str media-id))
(sto/touch-object! storage media-id))))))
(defn- clean-file-thumbnails!
[{:keys [::db/conn ::sto/storage]} file-id revn]
@ -197,15 +199,15 @@
res (db/exec! conn [sql file-id revn])]
(when (seq res)
(l/dbg :hint "delete file thumbnails"
:file-id (str file-id)
:total (count res))
(doseq [media-id (into #{} (keep :media-id) res)]
;; Mark as deleted the storage object related with the
;; media-id field.
(l/trace :hint "mark storage object as deleted" :id media-id)
(sto/del-object! storage media-id))
(l/debug :hint "delete file thumbnails"
:file-id file-id
:total (count res)))))
(l/trc :hint "delete file thumbnail storage object" :id (str media-id))
(sto/del-object! storage media-id)))))
(def ^:private
@ -250,7 +252,7 @@
(mapv :id))]
(when (seq unused)
(l/debug :hint "clean deleted components" :total (count unused))
(l/dbg :hint "clean deleted components" :total (count unused))
(let [data (reduce ctkl/delete-component data unused)]
(db/update! conn :file
@ -283,12 +285,12 @@
rows (db/exec! conn [sql file-id used])]
(doseq [fragment-id (map :id rows)]
(l/trace :hint "remove unused file data fragment" :id (str fragment-id))
(l/trc :hint "remove unused file data fragment" :id (str fragment-id))
(db/delete! conn :file-data-fragment {:id fragment-id :file-id file-id})))))
(defn- process-file
[{:keys [::db/conn] :as cfg} {:keys [id data revn modified-at features] :as file}]
(l/debug :hint "processing file" :id id :modified-at modified-at)
(l/dbg :hint "processing file" :id id :modified-at modified-at)
(binding [pmap/*load-fn* (partial files/load-pointer conn id)
pmap/*tracked* (atom {})]
@ -297,7 +299,7 @@
(clean-file-media! conn id data)
(clean-file-tagged-object-thumbnails! cfg id data)
(clean-file-object-thumbnails! cfg id data)
(clean-file-thumbnails! cfg id revn)
(clean-deleted-components! conn id data)

View file

@ -8,6 +8,7 @@
[app.common.features :as cfeat]
[app.common.pprint :as pp]
[app.common.pprint :as pp]
[app.common.thumbnails :as thc]
[app.common.types.shape :as cts]
[app.common.uuid :as uuid]
@ -229,7 +230,6 @@
(t/deftest file-gc-task-with-thumbnails
(letfn [(add-file-media-object [& {:keys [profile-id file-id]}]
(let [mfile {:filename "sample.jpg"
@ -519,6 +519,195 @@
(t/is (nil? (sto/get-object storage (:media-id fmo2))))
(t/is (nil? (sto/get-object storage (:media-id fmo1)))))))
(t/deftest file-gc-task-with-object-thumbnails
(letfn [(insert-file-object-thumbnail! [& {:keys [profile-id file-id page-id frame-id]}]
(let [object-id (thc/fmt-object-id file-id page-id frame-id "frame")
mfile {:filename "sample.jpg"
:path (th/tempfile "backend_tests/test_files/sample.jpg")
:mtype "image/jpeg"
:size 312043}
params {::th/type :create-file-object-thumbnail
::rpc/profile-id profile-id
:file-id file-id
:object-id object-id
:tag "frame"
:media mfile}
out (th/command! params)]
;; (th/print-result! out)
(t/is (nil? (:error out)))
(:result out)))
(update-file! [& {:keys [profile-id file-id changes revn] :or {revn 0}}]
(let [params {::th/type :update-file
::rpc/profile-id profile-id
:id file-id
:session-id (uuid/random)
:revn revn
:features cfeat/supported-features
:changes changes}
out (th/command! params)]
;; (th/print-result! out)
(t/is (nil? (:error out)))
(:result out)))]
(let [storage (:app.storage/storage th/*system*)
profile (th/create-profile* 1)
file (th/create-file* 1 {:profile-id (:id profile)
:project-id (:default-project-id profile)
:is-shared false})
file-id (get file :id)
page-id (first (get-in file [:data :pages]))
frame-id-1 (uuid/random)
frame-id-2 (uuid/random)
fot-1 (insert-file-object-thumbnail! :profile-id (:id profile)
:file-id file-id
:page-id page-id
:frame-id frame-id-1)
fot-2 (insert-file-object-thumbnail! :profile-id (:id profile)
:page-id page-id
:file-id file-id
:frame-id frame-id-2)
;; Add a two frames
:file-id (:id file)
:profile-id (:id profile)
:revn 0
[{:type :add-obj
:page-id page-id
:id frame-id-1
:parent-id uuid/zero
:frame-id uuid/zero
:obj (cts/setup-shape
{:id frame-id-2
:name "Board"
:frame-id uuid/zero
:parent-id uuid/zero
:type :frame})}
{:type :add-obj
:page-id page-id
:id frame-id-2
:parent-id uuid/zero
:frame-id uuid/zero
:obj (cts/setup-shape
{:id frame-id-2
:name "Board"
:frame-id uuid/zero
:parent-id uuid/zero
:type :frame})}])
;; Check that reference storage objects are the same because of
;; deduplication feature.
(t/is (= (:media-id fot-1) (:media-id fot-2)))
;; If we launch gc-touched-task, we should have 1 item to freeze
;; because of the deduplication (we have uploaded 2 times the
;; same files).
(let [res (th/run-task! "storage-gc-touched" {:min-age (dt/duration 0)})]
(t/is (= 1 (:freeze res)))
(t/is (= 0 (:delete res))))
;; run the file-gc task immediately without forced min-age
(let [res (th/run-task! "file-gc")]
(t/is (= 0 (:processed res))))
;; run the task again
(let [res (th/run-task! "file-gc" {:min-age 0})]
(t/is (= 1 (:processed res))))
;; retrieve file and check trimmed attribute
(let [row (th/db-get :file {:id (:id file)})]
(t/is (true? (:has-media-trimmed row))))
;; check file media objects
(let [rows (th/db-exec! ["select * from file_tagged_object_thumbnail where file_id = ?" file-id])]
;; (pp/pprint rows)
(t/is (= 2 (count rows))))
;; check file media objects
(let [rows (th/db-exec! ["select * from storage_object where deleted_at is null"])]
;; (pp/pprint rows)
(t/is (= 1 (count rows))))
;; The underlying storage objects are available.
(t/is (some? (sto/get-object storage (:media-id fot-1))))
(t/is (some? (sto/get-object storage (:media-id fot-2))))
;; proceed to remove one frame
:file-id file-id
:profile-id (:id profile)
:revn 0
:changes [{:type :del-obj
:page-id page-id
:id frame-id-2}])
(let [res (th/run-task! "file-gc" {:min-age (dt/duration 0)})]
(t/is (= 1 (:processed res))))
(let [rows (th/db-exec! ["select * from file_tagged_object_thumbnail where file_id = ?" file-id])]
;; (pp/pprint rows)
(t/is (= 1 (count rows)))
(t/is (= (thc/fmt-object-id file-id page-id frame-id-1 "frame")
(-> rows first :object-id))))
;; Now that file-gc have deleted the object thumbnail lets
;; execute the touched-gc task
(let [res (th/run-task! "storage-gc-touched" {:min-age (dt/duration 0)})]
(t/is (= 1 (:freeze res))))
;; check file media objects
(let [rows (th/db-exec! ["select * from storage_object where deleted_at is null"])]
;; (pp/pprint rows)
(t/is (= 1 (count rows))))
;; proceed to remove one frame
:file-id file-id
:profile-id (:id profile)
:revn 0
:changes [{:type :del-obj
:page-id page-id
:id frame-id-1}])
(let [res (th/run-task! "file-gc" {:min-age (dt/duration 0)})]
(t/is (= 1 (:processed res))))
(let [rows (th/db-exec! ["select * from file_tagged_object_thumbnail where file_id = ?" file-id])]
(t/is (= 0 (count rows))))
;; We still have th storage objects in the table
(let [rows (th/db-exec! ["select * from storage_object where deleted_at is null"])]
;; (pp/pprint rows)
(t/is (= 1 (count rows))))
;; Now that file-gc have deleted the object thumbnail lets
;; execute the touched-gc task
(let [res (th/run-task! "storage-gc-touched" {:min-age (dt/duration 0)})]
(t/is (= 1 (:delete res))))
;; check file media objects
(let [rows (th/db-exec! ["select * from storage_object where deleted_at is null"])]
;; (pp/pprint rows)
(t/is (= 0 (count rows))))
(t/deftest permissions-checks-creating-file
(let [profile1 (th/create-profile* 1)
profile2 (th/create-profile* 2)
@ -833,7 +1022,7 @@
:mtype "image/jpeg"}}
{:keys [error result] :as out} (th/command! data)]
(t/is (nil? error))
(t/is (nil? result)))
(t/is (map? result)))
;; Check the result
(let [data {::th/type :get-file-data-for-thumbnail
@ -891,12 +1080,12 @@
:mtype "image/jpeg"}}
{:keys [error result] :as out} (th/command! data)]
(t/is (nil? error))
(t/is (nil? result)))
(t/is (map? result)))
;; Wait to file be ellegible for GC
(th/sleep 300)
;; run the task again
;; run the task
(let [res (th/run-task! "file-gc" {:min-age 0})]
(t/is (= 1 (:processed res))))
@ -916,7 +1105,7 @@
:mtype "image/jpeg"}}
{:keys [error result] :as out} (th/command! data)]
(t/is (nil? error))
(t/is (nil? result)))
(t/is (map? result)))
;; Mark file as modified
(th/db-exec! ["update file set has_media_trimmed=false where id=?" (:id file)])

View file

@ -75,11 +75,16 @@
(let [out (th/command! data1)]
(t/is (nil? (:error out)))
(t/is (nil? (:result out))))
(t/is (map? (:result out))))
(let [out (th/command! data2)]
(t/is (nil? (:error out)))
(t/is (nil? (:result out))))
(t/is (map? (:result out))))
;; run the task again
(let [res (th/run-task! "storage-gc-touched" {:min-age 0})]
(t/is (= 2 (:freeze res))))
(let [[row1 row2 :as rows] (th/db-query :file-tagged-object-thumbnail
{:file-id (:id file)}
@ -122,9 +127,14 @@
(t/is (uuid? (:media-id row2))))
;; Check if storage objects still exists after file-gc
(t/is (nil? (sto/get-object storage (:media-id row1))))
(t/is (some? (sto/get-object storage (:media-id row1))))
(t/is (some? (sto/get-object storage (:media-id row2))))
;; run the task again
(let [res (th/run-task! "storage-gc-touched" {:min-age 0})]
(t/is (= 1 (:delete res)))
(t/is (= 0 (:freeze res))))
;; check that storage object is still exists but is marked as deleted
(let [row (th/db-get :storage-object {:id (:media-id row1)} {::db/remove-deleted? false})]
(t/is (some? (:deleted-at row))))
@ -134,13 +144,13 @@
(let [result (th/run-task! :storage-gc-deleted {:min-age (dt/duration 0)})]
(t/is (= 1 (:deleted result))))
(t/is (nil? (sto/get-object storage (:media-id row1))))
(t/is (some? (sto/get-object storage (:media-id row2))))
;; check that storage object is still exists but is marked as deleted
(let [row (th/db-get :storage-object {:id (:media-id row1)} {::db/remove-deleted? false})]
(t/is (nil? row)))
(t/is (some? (sto/get-object storage (:media-id row2))))
(t/deftest create-file-thumbnail
@ -254,7 +264,7 @@
(let [out (th/command! data)]
(t/is (nil? (:error out)))
(t/is (nil? (:result out))))
(t/is (map? (:result out))))
(let [[row :as rows] (th/db-query :file-tagged-object-thumbnail
{:file-id (:id file)}

View file

@ -309,7 +309,7 @@
(let [res (db/exec-one! th/*pool* ["select count(*) from storage_object where deleted_at is null"])]
(t/is (= 2 (:count res)))))
;; now we proceed to manually delete all team_font_variant
;; now we proceed to manually delete all file_media_object
(db/exec-one! th/*pool* ["delete from file_media_object"])
;; run the touched gc task