From e54b443247889ecf60f78bb0a42f4a542bbe58f7 Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Fri, 29 Jan 2021 23:56:11 +0100 Subject: [PATCH] :tada: Add refcount-like functionality to storages. This allows reuse of storage objects among different files. --- backend/src/app/db.clj | 5 ++ backend/src/app/main.clj | 13 +++- backend/src/app/migrations.clj | 6 ++ .../sql/0043-drop-old-tables-and-fields.sql | 10 +++ .../sql/0044-add-storage-refcount.sql | 26 +++++++ backend/src/app/rpc/mutations/media.clj | 20 +---- backend/src/app/storage.clj | 73 ++++++++++++++++++- 7 files changed, 130 insertions(+), 23 deletions(-) create mode 100644 backend/src/app/migrations/sql/0043-drop-old-tables-and-fields.sql create mode 100644 backend/src/app/migrations/sql/0044-add-storage-refcount.sql diff --git a/backend/src/app/db.clj b/backend/src/app/db.clj index a5d2d0ca9..9fdee5799 100644 --- a/backend/src/app/db.clj +++ b/backend/src/app/db.clj @@ -258,6 +258,11 @@ [p] (PGpoint. (:x p) (:y p))) +(defn create-array + [conn type aobjects] + (let [^PGConnection conn (unwrap conn org.postgresql.PGConnection)] + (.createArrayOf conn ^String type aobjects))) + (defn decode-pgpoint [^PGpoint v] (gpt/point (.-x v) (.-y v))) diff --git a/backend/src/app/main.clj b/backend/src/app/main.clj index fad23167e..f9f1566c8 100644 --- a/backend/src/app/main.clj +++ b/backend/src/app/main.clj @@ -55,11 +55,14 @@ :app.tokens/tokens {:sprops (ig/ref :app.sprops/props)} - :app.storage/gc-task + :app.storage/gc-deleted-task {:pool (ig/ref :app.db/pool) :storage (ig/ref :app.storage/storage) :min-age (dt/duration {:hours 2})} + :app.storage/gc-touched-task + {:pool (ig/ref :app.db/pool)} + :app.storage/recheck-task {:pool (ig/ref :app.db/pool) :storage (ig/ref :app.storage/storage)} @@ -192,9 +195,13 @@ :cron #app/cron "0 0 */6 * * ?" ;; every 2 hours :fn (ig/ref :app.tasks.file-xlog-gc/handler)} - {:id "storage-gc" + {:id "storage-deleted-gc" :cron #app/cron "0 0 */6 * * ?" ;; every 6 hours - :fn (ig/ref :app.storage/gc-task)} + :fn (ig/ref :app.storage/gc-deleted-task)} + + {:id "storage-touched-gc" + :cron #app/cron "0 30 */6 * * ?" ;; every 6 hours + :fn (ig/ref :app.storage/gc-touched-task)} {:id "storage-recheck" :cron #app/cron "0 0 */6 * * ?" ;; every 6 hours diff --git a/backend/src/app/migrations.clj b/backend/src/app/migrations.clj index 1be3db2cc..a11c12e05 100644 --- a/backend/src/app/migrations.clj +++ b/backend/src/app/migrations.clj @@ -140,6 +140,12 @@ {:name "0042-add-server-prop-table" :fn (mg/resource "app/migrations/sql/0042-add-server-prop-table.sql")} + + {:name "0043-drop-old-tables-and-fields" + :fn (mg/resource "app/migrations/sql/0043-drop-old-tables-and-fields.sql")} + + {:name "0044-add-storage-refcount" + :fn (mg/resource "app/migrations/sql/0044-add-storage-refcount.sql")} ]) diff --git a/backend/src/app/migrations/sql/0043-drop-old-tables-and-fields.sql b/backend/src/app/migrations/sql/0043-drop-old-tables-and-fields.sql new file mode 100644 index 000000000..fe87dca32 --- /dev/null +++ b/backend/src/app/migrations/sql/0043-drop-old-tables-and-fields.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS file_media_thumbnail; + +ALTER TABLE profile DROP COLUMN photo; +ALTER TABLE team DROP COLUMN photo; + +ALTER TABLE file_media_object DROP COLUMN path; +ALTER TABLE file_media_object ALTER COLUMN media_id SET NOT NULL; + +ALTER TRIGGER media_object__insert__tgr + ON file_media_object RENAME TO file_media_object__on_insert__tgr; diff --git a/backend/src/app/migrations/sql/0044-add-storage-refcount.sql b/backend/src/app/migrations/sql/0044-add-storage-refcount.sql new file mode 100644 index 000000000..28f97548c --- /dev/null +++ b/backend/src/app/migrations/sql/0044-add-storage-refcount.sql @@ -0,0 +1,26 @@ +ALTER TABLE storage_object + ADD COLUMN touched_at timestamptz NULL; + +CREATE INDEX storage_object__id_touched_at__idx + ON storage_object (touched_at, id) + WHERE touched_at IS NOT NULL; + +-- DROP TRIGGER file_media_object__on_delete__tgr ON file_media_object CASCADE; +-- DROP FUNCTION on_delete_file_media_object () ; + +CREATE OR REPLACE FUNCTION on_delete_file_media_object() + RETURNS TRIGGER AS $func$ + BEGIN + IF OLD.thumbnail_id IS NOT NULL THEN + UPDATE storage_object + SET touched_at = now() + WHERE id in (OLD.thumbnail_id, OLD.media_id); + ELSE + UPDATE storage_object + SET touched_at = now() + WHERE id = OLD.media_id; + END IF; + RETURN OLD; + END; +$func$ LANGUAGE plpgsql; + diff --git a/backend/src/app/rpc/mutations/media.clj b/backend/src/app/rpc/mutations/media.clj index dbb6c6b92..89dd6d3e9 100644 --- a/backend/src/app/rpc/mutations/media.clj +++ b/backend/src/app/rpc/mutations/media.clj @@ -158,27 +158,15 @@ (clone-file-media-object params))))) (defn clone-file-media-object - [{:keys [conn storage] :as cfg} {:keys [id file-id is-local]}] - (let [mobj (db/get-by-id conn :file-media-object id) - - ;; This makes the storage participate in the same transaction. - storage (assoc storage :conn conn) - - img-obj (sto/get-object storage (:media-id mobj)) - thm-obj (when (:thumbnail-id mobj) - (sto/get-object storage (:thumbnail-id mobj))) - - image (sto/clone-object storage img-obj) - thumb (when thm-obj - (sto/clone-object storage thm-obj))] - + [{:keys [conn] :as cfg} {:keys [id file-id is-local]}] + (let [mobj (db/get-by-id conn :file-media-object id)] (db/insert! conn :file-media-object {:id (uuid/next) :file-id file-id :is-local is-local :name (:name mobj) - :media-id (:id image) - :thumbnail-id (:id thumb) + :media-id (:media-id mobj) + :thumbnail-id (:thumbnail-id mobj) :width (:width mobj) :height (:height mobj) :mtype (:mtype mobj)}))) diff --git a/backend/src/app/storage.clj b/backend/src/app/storage.clj index 815fa134e..15c1f3330 100644 --- a/backend/src/app/storage.clj +++ b/backend/src/app/storage.clj @@ -253,7 +253,7 @@ (assoc backend :conn (or conn pool)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Garbage Collection Task +;; Garbage Collection: Permanently delete objects ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; A task responsible to permanently delete already marked as deleted @@ -263,10 +263,10 @@ (s/def ::min-age ::dt/duration) -(defmethod ig/pre-init-spec ::gc-task [_] +(defmethod ig/pre-init-spec ::gc-deleted-task [_] (s/keys :req-un [::storage ::db/pool ::min-age])) -(defmethod ig/init-key ::gc-task +(defmethod ig/init-key ::gc-deleted-task [_ {:keys [pool storage min-age] :as cfg}] (letfn [(retrieve-deleted-objects [conn] (let [min-age (db/interval min-age) @@ -301,6 +301,71 @@ where id in (select id from items_part) returning *;") +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Garbage Collection: Analize touched objects +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; This task is part of the garbage collection of storage objects and +;; is responsible on analizing the touched objects and mark them for deletion +;; if corresponds. +;; +;; When file_media_object is deleted, the depending storage_object are +;; marked as touched. This means that some files that depend on a +;; concrete storage_object are no longer exists and maybe this +;; storage_object is no longer necessary and can be ellegible for +;; elimination. This task peridically analizes touched objects and +;; mark them as freeze (means that has other references and the object +;; is still valid) or deleted (no more references to this object so is +;; ready to be deleted). + +(declare sql:retrieve-touched-objects) + +(defmethod ig/pre-init-spec ::gc-touched-task [_] + (s/keys :req-un [::db/pool])) + +(defmethod ig/init-key ::gc-touched-task + [_ {:keys [pool] :as cfg}] + (letfn [(retrieve-touched-objects [conn] + (seq (db/exec! conn [sql:retrieve-touched-objects]))) + + (group-resuls [rows] + (let [conj (fnil conj [])] + (reduce (fn [acc {:keys [id nrefs]}] + (if (pos? nrefs) + (update acc :to-freeze conj id) + (update acc :to-delete conj id))) + {} + rows))) + + (mark-delete-in-bulk [conn ids] + (db/exec-one! conn ["update storage_object set deleted_at=now(), touched_at=null where id = ANY(?)" + (db/create-array conn "uuid" (into-array java.util.UUID ids))])) + + (mark-freeze-in-bulk [conn ids] + (db/exec-one! conn ["update storage_object set touched_at=null where id = ANY(?)" + (db/create-array conn "uuid" (into-array java.util.UUID ids))]))] + + (fn [task] + (db/with-atomic [conn pool] + (loop [] + (when-let [touched (retrieve-touched-objects conn)] + (let [{:keys [to-delete to-freeze]} (group-resuls touched)] + (when (seq to-delete) + (mark-delete-in-bulk conn to-delete)) + (when (seq to-freeze) + (mark-freeze-in-bulk conn to-freeze)) + (Thread/sleep 100) + (recur)))) + nil)))) + +(def sql:retrieve-touched-objects + "select so.id, + ((select count(*) from file_media_object where media_id = so.id) + + (select count(*) from file_media_object where thumbnail_id = so.id)) as nrefs + from storage_object as so + where so.touched_at is not null + order by so.touched_at + limit 500;") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Recheck Stalled Task @@ -351,7 +416,7 @@ (def sql:retrieve-pending "with items_part as ( select s.id - from storage_pending as s + from storage_pending as s where s.created_at < now() - '1 hour'::interval order by s.created_at limit 100