2022-03-23 10:59:20 +01:00
|
|
|
;; This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
;; License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
;;
|
2022-09-20 23:23:22 +02:00
|
|
|
;; Copyright (c) KALEIDOS INC
|
2022-03-23 10:59:20 +01:00
|
|
|
|
|
|
|
(ns app.tasks.file-gc
|
|
|
|
"A maintenance task that is responsible of: purge unused file media,
|
2022-03-30 00:11:43 +02:00
|
|
|
clean unused object thumbnails and remove old file thumbnails. The
|
2022-03-23 10:59:20 +01:00
|
|
|
file is eligible to be garbage collected after some period of
|
|
|
|
inactivity (the default threshold is 72h)."
|
|
|
|
(:require
|
|
|
|
[app.common.data :as d]
|
2023-05-26 16:37:15 +02:00
|
|
|
[app.common.files.migrations :as pmg]
|
2022-03-23 10:59:20 +01:00
|
|
|
[app.common.logging :as l]
|
2023-10-31 12:21:02 +01:00
|
|
|
[app.common.thumbnails :as thc]
|
2023-03-10 15:43:26 +01:00
|
|
|
[app.common.types.components-list :as ctkl]
|
2022-09-23 10:20:20 +02:00
|
|
|
[app.common.types.file :as ctf]
|
2022-06-28 11:05:45 +02:00
|
|
|
[app.common.types.shape-tree :as ctt]
|
2022-08-11 16:59:57 +02:00
|
|
|
[app.config :as cf]
|
2022-03-23 10:59:20 +01:00
|
|
|
[app.db :as db]
|
2023-05-05 11:42:45 +02:00
|
|
|
[app.media :as media]
|
2022-11-01 09:46:54 +01:00
|
|
|
[app.rpc.commands.files :as files]
|
2023-05-05 11:42:45 +02:00
|
|
|
[app.storage :as sto]
|
2022-03-23 10:59:20 +01:00
|
|
|
[app.util.blob :as blob]
|
2022-11-01 09:46:54 +01:00
|
|
|
[app.util.pointer-map :as pmap]
|
2022-03-23 10:59:20 +01:00
|
|
|
[app.util.time :as dt]
|
2022-03-30 00:11:43 +02:00
|
|
|
[clojure.set :as set]
|
2022-03-23 10:59:20 +01:00
|
|
|
[clojure.spec.alpha :as s]
|
|
|
|
[integrant.core :as ig]))
|
|
|
|
|
2023-04-25 17:14:38 +02:00
|
|
|
(declare ^:private get-candidates)
|
2022-03-23 10:59:20 +01:00
|
|
|
(declare ^:private process-file)
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; HANDLER
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
(defmethod ig/pre-init-spec ::handler [_]
|
2023-05-05 11:42:45 +02:00
|
|
|
(s/keys :req [::db/pool ::sto/storage]))
|
2022-08-11 16:59:57 +02:00
|
|
|
|
|
|
|
(defmethod ig/prep-key ::handler
|
|
|
|
[_ cfg]
|
2023-02-06 12:27:53 +01:00
|
|
|
(assoc cfg ::min-age cf/deletion-delay))
|
2022-03-23 10:59:20 +01:00
|
|
|
|
|
|
|
(defmethod ig/init-key ::handler
|
2023-02-06 12:27:53 +01:00
|
|
|
[_ {:keys [::db/pool] :as cfg}]
|
2022-11-10 09:43:13 +01:00
|
|
|
(fn [{:keys [file-id] :as params}]
|
2023-04-25 17:14:38 +02:00
|
|
|
|
2022-03-23 10:59:20 +01:00
|
|
|
(db/with-atomic [conn pool]
|
2023-04-25 17:14:38 +02:00
|
|
|
(let [min-age (dt/duration (or (:min-age params) (::min-age cfg)))
|
|
|
|
cfg (-> cfg
|
2023-05-05 11:42:45 +02:00
|
|
|
(update ::sto/storage media/configure-assets-storage conn)
|
2023-04-25 17:14:38 +02:00
|
|
|
(assoc ::db/conn conn)
|
|
|
|
(assoc ::file-id file-id)
|
|
|
|
(assoc ::min-age min-age))
|
|
|
|
|
|
|
|
total (reduce (fn [total file]
|
|
|
|
(process-file cfg file)
|
|
|
|
(inc total))
|
|
|
|
0
|
|
|
|
(get-candidates cfg))]
|
|
|
|
|
|
|
|
(l/info :hint "task finished" :min-age (dt/format-duration min-age) :processed total)
|
|
|
|
|
|
|
|
;; Allow optional rollback passed by params
|
|
|
|
(when (:rollback? params)
|
|
|
|
(db/rollback! conn))
|
|
|
|
|
|
|
|
{:processed total}))))
|
2022-03-23 10:59:20 +01:00
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; IMPL
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
(def ^:private
|
2023-04-25 17:14:38 +02:00
|
|
|
sql:get-candidates-chunk
|
2022-03-23 10:59:20 +01:00
|
|
|
"select f.id,
|
|
|
|
f.data,
|
|
|
|
f.revn,
|
2022-11-01 09:46:54 +01:00
|
|
|
f.features,
|
2022-03-23 10:59:20 +01:00
|
|
|
f.modified_at
|
|
|
|
from file as f
|
|
|
|
where f.has_media_trimmed is false
|
|
|
|
and f.modified_at < now() - ?::interval
|
|
|
|
and f.modified_at < ?
|
|
|
|
order by f.modified_at desc
|
|
|
|
limit 1
|
|
|
|
for update skip locked")
|
|
|
|
|
2023-04-25 17:14:38 +02:00
|
|
|
(defn- get-candidates
|
|
|
|
[{:keys [::db/conn ::min-age ::file-id]}]
|
2022-11-10 09:43:13 +01:00
|
|
|
(if (uuid? file-id)
|
2022-08-11 16:59:57 +02:00
|
|
|
(do
|
2022-11-10 09:43:13 +01:00
|
|
|
(l/warn :hint "explicit file id passed on params" :file-id file-id)
|
|
|
|
(->> (db/query conn :file {:id file-id})
|
2022-11-01 09:46:54 +01:00
|
|
|
(map #(update % :features db/decode-pgarray #{}))))
|
2022-08-11 16:59:57 +02:00
|
|
|
(let [interval (db/interval min-age)
|
|
|
|
get-chunk (fn [cursor]
|
2023-04-25 17:14:38 +02:00
|
|
|
(let [rows (db/exec! conn [sql:get-candidates-chunk interval cursor])]
|
2022-11-01 09:46:54 +01:00
|
|
|
[(some->> rows peek :modified-at)
|
|
|
|
(map #(update % :features db/decode-pgarray #{}) rows)]))]
|
|
|
|
|
2022-09-28 23:26:31 +02:00
|
|
|
(d/iteration get-chunk
|
|
|
|
:vf second
|
|
|
|
:kf first
|
|
|
|
:initk (dt/now)))))
|
2022-03-23 10:59:20 +01:00
|
|
|
|
2022-06-22 11:39:57 +02:00
|
|
|
(defn collect-used-media
|
2023-04-25 17:14:38 +02:00
|
|
|
"Given a fdata (file data), returns all media references."
|
2022-03-23 10:59:20 +01:00
|
|
|
[data]
|
|
|
|
(let [xform (comp
|
|
|
|
(map :objects)
|
|
|
|
(mapcat vals)
|
2023-09-08 14:50:58 +02:00
|
|
|
(mapcat (fn [obj]
|
2023-08-21 18:03:49 +02:00
|
|
|
;; NOTE: because of some bug, we ended with
|
|
|
|
;; many shape types having the ability to
|
|
|
|
;; have fill-image attribute (which initially
|
|
|
|
;; designed for :path shapes).
|
2023-09-08 14:50:58 +02:00
|
|
|
(sequence
|
|
|
|
(keep :id)
|
|
|
|
(concat [(:fill-image obj)
|
|
|
|
(:metadata obj)]
|
|
|
|
(map :fill-image (:fills obj))
|
|
|
|
(map :stroke-image (:strokes obj))
|
|
|
|
(->> (:content obj)
|
|
|
|
(tree-seq map? :children)
|
|
|
|
(mapcat :fills)
|
|
|
|
(map :fill-image)))))))
|
2022-03-23 10:59:20 +01:00
|
|
|
pages (concat
|
|
|
|
(vals (:pages-index data))
|
|
|
|
(vals (:components data)))]
|
|
|
|
(-> #{}
|
|
|
|
(into xform pages)
|
|
|
|
(into (keys (:media data))))))
|
|
|
|
|
|
|
|
(defn- clean-file-media!
|
|
|
|
"Performs the garbage collection of file media objects."
|
|
|
|
[conn file-id data]
|
|
|
|
(let [used (collect-used-media data)
|
|
|
|
unused (->> (db/query conn :file-media-object {:file-id file-id})
|
|
|
|
(remove #(contains? used (:id %))))]
|
|
|
|
|
|
|
|
(doseq [mobj unused]
|
|
|
|
(l/debug :hint "delete file media object"
|
|
|
|
:id (:id mobj)
|
|
|
|
:media-id (:media-id mobj)
|
|
|
|
:thumbnail-id (:thumbnail-id mobj))
|
|
|
|
|
|
|
|
;; NOTE: deleting the file-media-object in the database
|
|
|
|
;; automatically marks as touched the referenced storage
|
|
|
|
;; objects. The touch mechanism is needed because many files can
|
|
|
|
;; point to the same storage objects and we can't just delete
|
|
|
|
;; them.
|
|
|
|
(db/delete! conn :file-media-object {:id (:id mobj)}))))
|
|
|
|
|
2023-11-03 10:04:37 +01:00
|
|
|
(defn- clean-file-tagged-object-thumbnails!
|
2023-05-05 11:42:45 +02:00
|
|
|
[{:keys [::db/conn ::sto/storage]} file-id data]
|
2023-11-03 10:04:37 +01:00
|
|
|
(let [stored (->> (db/query conn :file_tagged_object_thumbnail
|
2022-03-30 00:11:43 +02:00
|
|
|
{:file-id file-id}
|
|
|
|
{:columns [:object-id]})
|
|
|
|
(into #{} (map :object-id)))
|
|
|
|
|
2023-05-05 11:42:45 +02:00
|
|
|
using (into #{}
|
2023-10-31 12:21:02 +01:00
|
|
|
(mapcat
|
|
|
|
(fn [{:keys [id objects]}]
|
|
|
|
(->> (ctt/get-frames objects)
|
|
|
|
(mapcat
|
|
|
|
#(vector
|
|
|
|
(thc/fmt-object-id file-id id (:id %) "frame")
|
|
|
|
(thc/fmt-object-id file-id id (:id %) "component"))))))
|
2023-05-05 11:42:45 +02:00
|
|
|
(vals (:pages-index data)))
|
2022-03-30 00:11:43 +02:00
|
|
|
|
|
|
|
unused (set/difference stored using)]
|
|
|
|
|
|
|
|
(when (seq unused)
|
2023-11-03 10:04:37 +01:00
|
|
|
(let [sql (str "delete from file_tagged_object_thumbnail "
|
2023-05-05 11:42:45 +02:00
|
|
|
" where file_id=? and object_id=ANY(?)"
|
|
|
|
" returning media_id")
|
|
|
|
res (db/exec! conn [sql file-id (db/create-array conn "text" unused)])]
|
|
|
|
|
|
|
|
(doseq [media-id (into #{} (keep :media-id) res)]
|
|
|
|
;; Mark as deleted the storage object related with the
|
|
|
|
;; photo-id field.
|
|
|
|
(l/trace :hint "mark storage object as deleted" :id media-id)
|
|
|
|
(sto/del-object! storage media-id))
|
|
|
|
|
|
|
|
(l/debug :hint "delete file object thumbnails"
|
|
|
|
:file-id file-id
|
|
|
|
:total (count res))))))
|
2022-03-23 10:59:20 +01:00
|
|
|
|
|
|
|
(defn- clean-file-thumbnails!
|
2023-05-05 11:42:45 +02:00
|
|
|
[{:keys [::db/conn ::sto/storage]} file-id revn]
|
2022-03-23 10:59:20 +01:00
|
|
|
(let [sql (str "delete from file_thumbnail "
|
2023-05-05 11:42:45 +02:00
|
|
|
" where file_id=? and revn < ? "
|
|
|
|
" returning media_id")
|
|
|
|
res (db/exec! conn [sql file-id revn])]
|
|
|
|
|
|
|
|
(when (seq res)
|
|
|
|
(doseq [media-id (into #{} (keep :media-id) res)]
|
|
|
|
;; Mark as deleted the storage object related with the
|
2023-06-21 17:14:50 +02:00
|
|
|
;; media-id field.
|
2023-05-05 11:42:45 +02:00
|
|
|
(l/trace :hint "mark storage object as deleted" :id media-id)
|
|
|
|
(sto/del-object! storage media-id))
|
|
|
|
|
|
|
|
(l/debug :hint "delete file thumbnails"
|
|
|
|
:file-id file-id
|
|
|
|
:total (count res)))))
|
2022-09-23 10:20:20 +02:00
|
|
|
|
|
|
|
(def ^:private
|
2023-04-25 17:14:38 +02:00
|
|
|
sql:get-files-for-library
|
2022-09-23 10:20:20 +02:00
|
|
|
"select f.data, f.modified_at
|
|
|
|
from file as f
|
|
|
|
left join file_library_rel as fl on (fl.file_id = f.id)
|
|
|
|
where fl.library_file_id = ?
|
|
|
|
and f.modified_at < ?
|
|
|
|
and f.deleted_at is null
|
|
|
|
order by f.modified_at desc
|
|
|
|
limit 1")
|
|
|
|
|
|
|
|
(defn- clean-deleted-components!
|
|
|
|
"Performs the garbage collection of unreferenced deleted components."
|
2023-04-25 17:14:38 +02:00
|
|
|
[conn file-id data]
|
|
|
|
(letfn [(get-files-chunk [cursor]
|
|
|
|
(let [rows (db/exec! conn [sql:get-files-for-library file-id cursor])]
|
|
|
|
[(some-> rows peek :modified-at)
|
|
|
|
(map (comp blob/decode :data) rows)]))
|
|
|
|
|
|
|
|
(get-used-components [fdata components]
|
|
|
|
;; Find which of the components are used in the file.
|
|
|
|
(into #{}
|
|
|
|
(filter #(ctf/used-in? fdata file-id % :component))
|
|
|
|
components))
|
|
|
|
|
|
|
|
(get-unused-components [components files-data]
|
|
|
|
;; Find and return a set of unused components (on all files).
|
|
|
|
(reduce (fn [components fdata]
|
|
|
|
(if (seq components)
|
|
|
|
(->> (get-used-components fdata components)
|
|
|
|
(set/difference components))
|
|
|
|
(reduced components)))
|
|
|
|
|
|
|
|
components
|
|
|
|
files-data))]
|
|
|
|
|
|
|
|
(let [deleted (into #{} (ctkl/deleted-components-seq data))
|
|
|
|
unused (->> (d/iteration get-files-chunk :vf second :kf first :initk (dt/now))
|
|
|
|
(cons data)
|
|
|
|
(get-unused-components deleted)
|
|
|
|
(mapv :id))]
|
|
|
|
|
|
|
|
(when (seq unused)
|
|
|
|
(l/debug :hint "clean deleted components" :total (count unused))
|
|
|
|
|
|
|
|
(let [data (reduce ctkl/delete-component data unused)]
|
|
|
|
(db/update! conn :file
|
|
|
|
{:data (blob/encode data)}
|
|
|
|
{:id file-id}))))))
|
2022-11-01 09:46:54 +01:00
|
|
|
|
|
|
|
(defn- clean-data-fragments!
|
|
|
|
[conn file-id data]
|
2023-04-25 17:14:38 +02:00
|
|
|
(letfn [(get-pointers-chunk [cursor]
|
|
|
|
(let [sql (str "select id, data, created_at "
|
|
|
|
" from file_change "
|
|
|
|
" where file_id = ? "
|
|
|
|
" and data is not null "
|
|
|
|
" and created_at < ? "
|
|
|
|
" order by created_at desc "
|
|
|
|
" limit 1;")
|
|
|
|
rows (db/exec! conn [sql file-id cursor])]
|
|
|
|
[(some-> rows peek :created-at)
|
|
|
|
(mapcat (comp files/get-all-pointer-ids blob/decode :data) rows)]))]
|
|
|
|
|
|
|
|
(let [used (into (files/get-all-pointer-ids data)
|
|
|
|
(d/iteration get-pointers-chunk
|
|
|
|
:vf second
|
|
|
|
:kf first
|
|
|
|
:initk (dt/now)))
|
|
|
|
|
|
|
|
sql (str "select id from file_data_fragment "
|
|
|
|
" where file_id = ? AND id != ALL(?::uuid[])")
|
|
|
|
used (db/create-array conn "uuid" used)
|
|
|
|
rows (db/exec! conn [sql file-id used])]
|
|
|
|
|
|
|
|
(doseq [fragment-id (map :id rows)]
|
|
|
|
(l/trace :hint "remove unused file data fragment" :id (str fragment-id))
|
|
|
|
(db/delete! conn :file-data-fragment {:id fragment-id :file-id file-id})))))
|
2022-11-01 09:46:54 +01:00
|
|
|
|
2022-03-23 10:59:20 +01:00
|
|
|
(defn- process-file
|
2023-05-05 11:42:45 +02:00
|
|
|
[{:keys [::db/conn] :as cfg} {:keys [id data revn modified-at features] :as file}]
|
2022-03-23 10:59:20 +01:00
|
|
|
(l/debug :hint "processing file" :id id :modified-at modified-at)
|
|
|
|
|
2023-05-19 19:39:15 +02:00
|
|
|
(binding [pmap/*load-fn* (partial files/load-pointer conn id)
|
|
|
|
pmap/*tracked* (atom {})]
|
2022-11-01 09:46:54 +01:00
|
|
|
(let [data (-> (blob/decode data)
|
|
|
|
(assoc :id id)
|
|
|
|
(pmg/migrate-data))]
|
|
|
|
|
|
|
|
(clean-file-media! conn id data)
|
2023-11-03 10:04:37 +01:00
|
|
|
(clean-file-tagged-object-thumbnails! cfg id data)
|
2023-05-05 11:42:45 +02:00
|
|
|
(clean-file-thumbnails! cfg id revn)
|
2022-11-01 09:46:54 +01:00
|
|
|
(clean-deleted-components! conn id data)
|
2022-03-23 10:59:20 +01:00
|
|
|
|
2023-10-23 19:31:41 +02:00
|
|
|
(when (contains? features "fdata/pointer-map")
|
2022-11-01 09:46:54 +01:00
|
|
|
(clean-data-fragments! conn id data))
|
2022-03-23 10:59:20 +01:00
|
|
|
|
2022-11-01 09:46:54 +01:00
|
|
|
;; Mark file as trimmed
|
|
|
|
(db/update! conn :file
|
2022-03-23 10:59:20 +01:00
|
|
|
{:has-media-trimmed true}
|
2023-05-19 19:39:15 +02:00
|
|
|
{:id id})
|
|
|
|
|
|
|
|
(files/persist-pointers! conn id))))
|