2020-12-30 14:38:00 +01:00
|
|
|
;; This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
;; License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
;;
|
2022-09-20 23:23:22 +02:00
|
|
|
;; Copyright (c) KALEIDOS INC
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
(ns app.storage
|
2021-06-14 11:50:26 +02:00
|
|
|
"Objects storage abstraction layer."
|
2020-12-30 14:38:00 +01:00
|
|
|
(:require
|
|
|
|
[app.common.data :as d]
|
2022-02-24 23:36:53 +01:00
|
|
|
[app.common.data.macros :as dm]
|
2020-12-30 14:38:00 +01:00
|
|
|
[app.common.exceptions :as ex]
|
2021-09-29 16:39:25 +02:00
|
|
|
[app.common.logging :as l]
|
2020-12-30 14:38:00 +01:00
|
|
|
[app.common.spec :as us]
|
|
|
|
[app.common.uuid :as uuid]
|
|
|
|
[app.db :as db]
|
|
|
|
[app.storage.fs :as sfs]
|
|
|
|
[app.storage.impl :as impl]
|
|
|
|
[app.storage.s3 :as ss3]
|
|
|
|
[app.util.time :as dt]
|
2022-02-28 17:15:58 +01:00
|
|
|
[app.worker :as wrk]
|
2020-12-30 14:38:00 +01:00
|
|
|
[clojure.spec.alpha :as s]
|
2022-08-30 14:26:54 +02:00
|
|
|
[datoteka.fs :as fs]
|
2022-02-28 17:15:58 +01:00
|
|
|
[integrant.core :as ig]
|
|
|
|
[promesa.core :as p]
|
|
|
|
[promesa.exec :as px]))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Storage Module State
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
2023-02-06 12:27:53 +01:00
|
|
|
(s/def ::id #{:assets-fs :assets-s3})
|
2021-01-25 15:22:39 +01:00
|
|
|
(s/def ::s3 ::ss3/backend)
|
|
|
|
(s/def ::fs ::sfs/backend)
|
2023-02-06 12:27:53 +01:00
|
|
|
(s/def ::type #{:fs :s3})
|
2021-01-25 15:22:39 +01:00
|
|
|
|
2020-12-30 14:38:00 +01:00
|
|
|
(s/def ::backends
|
2021-06-14 11:50:26 +02:00
|
|
|
(s/map-of ::us/keyword
|
|
|
|
(s/nilable
|
|
|
|
(s/or :s3 ::ss3/backend
|
2022-06-22 11:34:36 +02:00
|
|
|
:fs ::sfs/backend))))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
(defmethod ig/pre-init-spec ::storage [_]
|
2023-02-06 12:27:53 +01:00
|
|
|
(s/keys :req [::db/pool ::wrk/executor ::backends]))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
(defmethod ig/init-key ::storage
|
2023-02-06 12:27:53 +01:00
|
|
|
[_ {:keys [::backends ::db/pool] :as cfg}]
|
2021-06-14 11:50:26 +02:00
|
|
|
(-> (d/without-nils cfg)
|
2023-02-06 12:27:53 +01:00
|
|
|
(assoc ::backends (d/without-nils backends))
|
|
|
|
(assoc ::db/pool-or-conn pool)))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2023-02-06 12:27:53 +01:00
|
|
|
(s/def ::backend keyword?)
|
2021-01-25 16:14:54 +01:00
|
|
|
(s/def ::storage
|
2023-02-06 12:27:53 +01:00
|
|
|
(s/keys :req [::backends ::db/pool ::db/pool-or-conn]
|
|
|
|
:opt [::backend]))
|
|
|
|
|
|
|
|
(s/def ::storage-with-backend
|
|
|
|
(s/and ::storage #(contains? % ::backend)))
|
2021-01-25 16:14:54 +01:00
|
|
|
|
2020-12-30 14:38:00 +01:00
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Database Objects
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
2022-02-28 17:15:58 +01:00
|
|
|
(defn get-metadata
|
|
|
|
[params]
|
|
|
|
(into {}
|
|
|
|
(remove (fn [[k _]] (qualified-keyword? k)))
|
|
|
|
params))
|
|
|
|
|
|
|
|
(defn- get-database-object-by-hash
|
2023-02-06 12:27:53 +01:00
|
|
|
[pool-or-conn backend bucket hash]
|
2022-02-28 17:15:58 +01:00
|
|
|
(let [sql (str "select * from storage_object "
|
|
|
|
" where (metadata->>'~:hash') = ? "
|
|
|
|
" and (metadata->>'~:bucket') = ? "
|
|
|
|
" and backend = ?"
|
|
|
|
" and deleted_at is null"
|
|
|
|
" limit 1")]
|
2023-02-06 12:27:53 +01:00
|
|
|
(some-> (db/exec-one! pool-or-conn [sql hash bucket (name backend)])
|
2022-06-20 14:17:31 +02:00
|
|
|
(update :metadata db/decode-transit-pgobject))))
|
2021-01-25 15:22:39 +01:00
|
|
|
|
2020-12-30 14:38:00 +01:00
|
|
|
(defn- create-database-object
|
2023-02-06 12:27:53 +01:00
|
|
|
[{:keys [::backend ::wrk/executor ::db/pool-or-conn]} {:keys [::content ::expired-at ::touched-at] :as params}]
|
2022-02-28 17:15:58 +01:00
|
|
|
(px/with-dispatch executor
|
2022-09-12 09:39:44 +02:00
|
|
|
(let [id (uuid/random)
|
2022-02-28 17:15:58 +01:00
|
|
|
|
|
|
|
mdata (cond-> (get-metadata params)
|
|
|
|
(satisfies? impl/IContentHash content)
|
|
|
|
(assoc :hash (impl/get-hash content)))
|
|
|
|
|
|
|
|
;; NOTE: for now we don't reuse the deleted objects, but in
|
|
|
|
;; futute we can consider reusing deleted objects if we
|
|
|
|
;; found a duplicated one and is marked for deletion but
|
|
|
|
;; still not deleted.
|
|
|
|
result (when (and (::deduplicate? params)
|
|
|
|
(:hash mdata)
|
|
|
|
(:bucket mdata))
|
2023-02-06 12:27:53 +01:00
|
|
|
(get-database-object-by-hash pool-or-conn backend (:bucket mdata) (:hash mdata)))
|
2022-02-28 17:15:58 +01:00
|
|
|
|
|
|
|
result (or result
|
2023-02-06 12:27:53 +01:00
|
|
|
(-> (db/insert! pool-or-conn :storage-object
|
2022-06-20 14:17:31 +02:00
|
|
|
{:id id
|
2022-06-22 11:34:36 +02:00
|
|
|
:size (impl/get-size content)
|
2022-06-20 14:17:31 +02:00
|
|
|
:backend (name backend)
|
|
|
|
:metadata (db/tjson mdata)
|
|
|
|
:deleted-at expired-at
|
|
|
|
:touched-at touched-at})
|
|
|
|
(update :metadata db/decode-transit-pgobject)
|
|
|
|
(update :metadata assoc ::created? true)))]
|
2022-02-28 17:15:58 +01:00
|
|
|
|
2023-02-06 12:27:53 +01:00
|
|
|
(impl/storage-object
|
|
|
|
(:id result)
|
|
|
|
(:size result)
|
|
|
|
(:created-at result)
|
|
|
|
(:deleted-at result)
|
|
|
|
(:touched-at result)
|
|
|
|
backend
|
|
|
|
(:metadata result)))))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
(def ^:private sql:retrieve-storage-object
|
2021-01-25 15:22:39 +01:00
|
|
|
"select * from storage_object where id = ? and (deleted_at is null or deleted_at > now())")
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2021-01-19 15:04:28 +01:00
|
|
|
(defn row->storage-object [res]
|
2022-02-10 19:50:40 +01:00
|
|
|
(let [mdata (or (some-> (:metadata res) (db/decode-transit-pgobject)) {})]
|
2023-02-06 12:27:53 +01:00
|
|
|
(impl/storage-object
|
|
|
|
(:id res)
|
|
|
|
(:size res)
|
|
|
|
(:created-at res)
|
|
|
|
(:deleted-at res)
|
|
|
|
(:touched-at res)
|
|
|
|
(keyword (:backend res))
|
|
|
|
mdata)))
|
2021-01-19 15:04:28 +01:00
|
|
|
|
2020-12-30 14:38:00 +01:00
|
|
|
(defn- retrieve-database-object
|
2023-02-06 12:27:53 +01:00
|
|
|
[conn id]
|
|
|
|
(some-> (db/exec-one! conn [sql:retrieve-storage-object id])
|
|
|
|
(row->storage-object)))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; API
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
2021-01-30 11:28:11 +01:00
|
|
|
(defn object->relative-path
|
|
|
|
[{:keys [id] :as obj}]
|
|
|
|
(impl/id->path id))
|
|
|
|
|
|
|
|
(defn file-url->path
|
|
|
|
[url]
|
2023-02-06 12:27:53 +01:00
|
|
|
(when url
|
|
|
|
(fs/path (java.net.URI. (str url)))))
|
2021-01-30 11:28:11 +01:00
|
|
|
|
2022-02-28 17:15:58 +01:00
|
|
|
(dm/export impl/content)
|
|
|
|
(dm/export impl/wrap-with-hash)
|
2023-02-06 12:27:53 +01:00
|
|
|
(dm/export impl/object?)
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
(defn get-object
|
2023-02-06 12:27:53 +01:00
|
|
|
[{:keys [::db/pool-or-conn ::wrk/executor] :as storage} id]
|
|
|
|
(us/assert! ::storage storage)
|
|
|
|
(px/with-dispatch executor
|
|
|
|
(retrieve-database-object pool-or-conn id)))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2022-02-28 17:15:58 +01:00
|
|
|
(defn put-object!
|
2021-01-30 11:28:11 +01:00
|
|
|
"Creates a new object with the provided content."
|
2023-02-06 12:27:53 +01:00
|
|
|
[{:keys [::backend] :as storage} {:keys [::content] :as params}]
|
|
|
|
(us/assert! ::storage-with-backend storage)
|
|
|
|
(us/assert! ::impl/content content)
|
|
|
|
(->> (create-database-object storage params)
|
|
|
|
(p/mcat (fn [object]
|
|
|
|
(if (::created? (meta object))
|
|
|
|
;; Store the data finally on the underlying storage subsystem.
|
|
|
|
(-> (impl/resolve-backend storage backend)
|
|
|
|
(impl/put-object object content))
|
|
|
|
(p/resolved object))))))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2022-02-28 17:15:58 +01:00
|
|
|
(defn touch-object!
|
|
|
|
"Mark object as touched."
|
2023-02-06 12:27:53 +01:00
|
|
|
[{:keys [::db/pool-or-conn ::wrk/executor] :as storage} object-or-id]
|
|
|
|
(us/assert! ::storage storage)
|
|
|
|
(px/with-dispatch executor
|
|
|
|
(let [id (if (impl/object? object-or-id) (:id object-or-id) object-or-id)
|
|
|
|
rs (db/update! pool-or-conn :storage-object
|
|
|
|
{:touched-at (dt/now)}
|
|
|
|
{:id id}
|
|
|
|
{::db/return-keys? false})]
|
|
|
|
(pos? (db/get-update-count rs)))))
|
2021-01-04 18:41:05 +01:00
|
|
|
|
2020-12-30 14:38:00 +01:00
|
|
|
(defn get-object-data
|
2021-06-14 11:50:26 +02:00
|
|
|
"Return an input stream instance of the object content."
|
2023-02-06 12:27:53 +01:00
|
|
|
[storage object]
|
|
|
|
(us/assert! ::storage storage)
|
|
|
|
(if (or (nil? (:expired-at object))
|
|
|
|
(dt/is-after? (:expired-at object) (dt/now)))
|
|
|
|
(-> (impl/resolve-backend storage (:backend object))
|
|
|
|
(impl/get-object-data object))
|
|
|
|
(p/resolved nil)))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2021-06-14 11:50:26 +02:00
|
|
|
(defn get-object-bytes
|
|
|
|
"Returns a byte array of object content."
|
2023-02-06 12:27:53 +01:00
|
|
|
[storage object]
|
|
|
|
(us/assert! ::storage storage)
|
|
|
|
(if (or (nil? (:expired-at object))
|
|
|
|
(dt/is-after? (:expired-at object) (dt/now)))
|
|
|
|
(-> (impl/resolve-backend storage (:backend object))
|
|
|
|
(impl/get-object-bytes object))
|
|
|
|
(p/resolved nil)))
|
2021-06-14 11:50:26 +02:00
|
|
|
|
2020-12-30 14:38:00 +01:00
|
|
|
(defn get-object-url
|
|
|
|
([storage object]
|
|
|
|
(get-object-url storage object nil))
|
2023-02-06 12:27:53 +01:00
|
|
|
([storage object options]
|
|
|
|
(us/assert! ::storage storage)
|
|
|
|
(if (or (nil? (:expired-at object))
|
|
|
|
(dt/is-after? (:expired-at object) (dt/now)))
|
|
|
|
(-> (impl/resolve-backend storage (:backend object))
|
|
|
|
(impl/get-object-url object options))
|
|
|
|
(p/resolved nil))))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2021-01-30 11:28:11 +01:00
|
|
|
(defn get-object-path
|
|
|
|
"Get the Path to the object. Only works with `:fs` type of
|
|
|
|
storages."
|
2021-01-31 19:25:26 +01:00
|
|
|
[storage object]
|
2023-02-06 12:27:53 +01:00
|
|
|
(us/assert! ::storage storage)
|
|
|
|
(let [backend (impl/resolve-backend storage (:backend object))]
|
|
|
|
(if (not= :fs (::type backend))
|
|
|
|
(p/resolved nil)
|
|
|
|
(if (or (nil? (:expired-at object))
|
|
|
|
(dt/is-after? (:expired-at object) (dt/now)))
|
|
|
|
(->> (impl/get-object-url backend object nil)
|
|
|
|
(p/fmap file-url->path))
|
|
|
|
(p/resolved nil)))))
|
2022-02-28 17:15:58 +01:00
|
|
|
|
|
|
|
(defn del-object!
|
2023-02-06 12:27:53 +01:00
|
|
|
[{:keys [::db/pool-or-conn ::wrk/executor] :as storage} object-or-id]
|
|
|
|
(us/assert! ::storage storage)
|
|
|
|
(px/with-dispatch executor
|
|
|
|
(let [id (if (impl/object? object-or-id) (:id object-or-id) object-or-id)
|
|
|
|
res (db/update! pool-or-conn :storage-object
|
2022-02-28 17:15:58 +01:00
|
|
|
{:deleted-at (dt/now)}
|
|
|
|
{:id id}
|
2023-01-02 22:56:24 +01:00
|
|
|
{::db/return-keys? false})]
|
2023-02-06 12:27:53 +01:00
|
|
|
(pos? (db/get-update-count res)))))
|
2021-01-25 15:22:39 +01:00
|
|
|
|
2022-02-24 23:36:53 +01:00
|
|
|
(dm/export impl/resolve-backend)
|
2022-02-28 17:15:58 +01:00
|
|
|
(dm/export impl/calculate-hash)
|
2020-12-30 14:38:00 +01:00
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
2021-01-29 23:56:11 +01:00
|
|
|
;; Garbage Collection: Permanently delete objects
|
2020-12-30 14:38:00 +01:00
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
;; A task responsible to permanently delete already marked as deleted
|
2022-03-04 18:00:16 +01:00
|
|
|
;; storage files. The storage objects are practically never marked to
|
|
|
|
;; be deleted directly by the api call. The touched-gc is responsible
|
2022-06-22 11:34:36 +02:00
|
|
|
;; of collecting the usage of the object and mark it as deleted. Only
|
|
|
|
;; the TMP files are are created with expiration date in future.
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2022-02-10 19:50:40 +01:00
|
|
|
(declare sql:retrieve-deleted-objects-chunk)
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2021-01-29 23:56:11 +01:00
|
|
|
(defmethod ig/pre-init-spec ::gc-deleted-task [_]
|
2023-02-06 12:27:53 +01:00
|
|
|
(s/keys :req [::storage ::db/pool]))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2022-08-11 16:42:28 +02:00
|
|
|
(defmethod ig/prep-key ::gc-deleted-task
|
|
|
|
[_ cfg]
|
2023-02-06 12:27:53 +01:00
|
|
|
(assoc cfg ::min-age (dt/duration {:hours 2})))
|
2022-08-11 16:42:28 +02:00
|
|
|
|
2021-01-29 23:56:11 +01:00
|
|
|
(defmethod ig/init-key ::gc-deleted-task
|
2023-02-06 12:27:53 +01:00
|
|
|
[_ {:keys [::db/pool ::storage ::min-age]}]
|
2022-08-11 16:42:28 +02:00
|
|
|
(letfn [(retrieve-deleted-objects-chunk [conn min-age cursor]
|
2021-01-25 15:22:39 +01:00
|
|
|
(let [min-age (db/interval min-age)
|
2022-02-10 19:50:40 +01:00
|
|
|
rows (db/exec! conn [sql:retrieve-deleted-objects-chunk min-age cursor])]
|
|
|
|
[(some-> rows peek :created-at)
|
2022-03-22 17:23:41 +01:00
|
|
|
(some->> (seq rows) (d/group-by #(-> % :backend keyword) :id #{}) seq)]))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2022-08-11 16:42:28 +02:00
|
|
|
(retrieve-deleted-objects [conn min-age]
|
2022-09-28 23:26:31 +02:00
|
|
|
(d/iteration (partial retrieve-deleted-objects-chunk conn min-age)
|
|
|
|
:initk (dt/now)
|
|
|
|
:vf second
|
|
|
|
:kf first))
|
2022-02-10 19:50:40 +01:00
|
|
|
|
2023-02-06 12:27:53 +01:00
|
|
|
(delete-in-bulk [backend-id ids]
|
|
|
|
(let [backend (impl/resolve-backend storage backend-id)]
|
2022-08-11 16:42:28 +02:00
|
|
|
|
|
|
|
(doseq [id ids]
|
2023-02-06 12:27:53 +01:00
|
|
|
(l/debug :hint "gc-deleted: permanently delete storage object" :backend backend-id :id id))
|
2022-08-11 16:42:28 +02:00
|
|
|
|
2022-02-28 17:15:58 +01:00
|
|
|
@(impl/del-objects-in-bulk backend ids)))]
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2022-08-11 16:42:28 +02:00
|
|
|
(fn [params]
|
2023-02-06 12:27:53 +01:00
|
|
|
(let [min-age (or (:min-age params) min-age)]
|
2022-08-11 16:42:28 +02:00
|
|
|
(db/with-atomic [conn pool]
|
|
|
|
(loop [total 0
|
|
|
|
groups (retrieve-deleted-objects conn min-age)]
|
2023-02-06 12:27:53 +01:00
|
|
|
(if-let [[backend-id ids] (first groups)]
|
2022-08-11 16:42:28 +02:00
|
|
|
(do
|
2023-02-06 12:27:53 +01:00
|
|
|
(delete-in-bulk backend-id ids)
|
2022-08-11 16:42:28 +02:00
|
|
|
(recur (+ total (count ids))
|
|
|
|
(rest groups)))
|
|
|
|
(do
|
2023-02-06 12:27:53 +01:00
|
|
|
(l/info :hint "gc-deleted: task finished" :min-age (dt/format-duration min-age) :total total)
|
2022-08-11 16:42:28 +02:00
|
|
|
{:deleted total}))))))))
|
2020-12-30 14:38:00 +01:00
|
|
|
|
2022-02-10 19:50:40 +01:00
|
|
|
(def sql:retrieve-deleted-objects-chunk
|
2020-12-30 14:38:00 +01:00
|
|
|
"with items_part as (
|
2021-01-25 15:22:39 +01:00
|
|
|
select s.id
|
|
|
|
from storage_object as s
|
2020-12-30 14:38:00 +01:00
|
|
|
where s.deleted_at is not null
|
2021-01-25 15:22:39 +01:00
|
|
|
and s.deleted_at < (now() - ?::interval)
|
2022-02-10 19:50:40 +01:00
|
|
|
and s.created_at < ?
|
|
|
|
order by s.created_at desc
|
2022-03-04 18:00:16 +01:00
|
|
|
limit 25
|
2020-12-30 14:38:00 +01:00
|
|
|
)
|
|
|
|
delete from storage_object
|
|
|
|
where id in (select id from items_part)
|
|
|
|
returning *;")
|
|
|
|
|
2021-01-29 23:56:11 +01:00
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
2021-11-15 09:53:10 -05:00
|
|
|
;; Garbage Collection: Analyze touched objects
|
2021-01-29 23:56:11 +01:00
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
2022-03-04 18:00:16 +01:00
|
|
|
;; This task is part of the garbage collection process of storage
|
|
|
|
;; objects and is responsible on analyzing the touched objects and
|
|
|
|
;; mark them for deletion if corresponds.
|
2021-01-29 23:56:11 +01:00
|
|
|
;;
|
2022-02-28 17:15:58 +01:00
|
|
|
;; For example: when file_media_object is deleted, the depending
|
|
|
|
;; storage_object are marked as touched. This means that some files
|
|
|
|
;; that depend on a concrete storage_object are no longer exists and
|
|
|
|
;; maybe this storage_object is no longer necessary and can be
|
|
|
|
;; eligible for elimination. This task periodically analyzes touched
|
|
|
|
;; objects and mark them as freeze (means that has other references
|
|
|
|
;; and the object is still valid) or deleted (no more references to
|
|
|
|
;; this object so is ready to be deleted).
|
2021-01-29 23:56:11 +01:00
|
|
|
|
2022-02-10 19:50:40 +01:00
|
|
|
(declare sql:retrieve-touched-objects-chunk)
|
|
|
|
(declare sql:retrieve-file-media-object-nrefs)
|
|
|
|
(declare sql:retrieve-team-font-variant-nrefs)
|
2022-02-28 17:15:58 +01:00
|
|
|
(declare sql:retrieve-profile-nrefs)
|
2021-01-29 23:56:11 +01:00
|
|
|
|
|
|
|
(defmethod ig/pre-init-spec ::gc-touched-task [_]
|
2023-02-06 12:27:53 +01:00
|
|
|
(s/keys :req [::db/pool]))
|
2021-01-29 23:56:11 +01:00
|
|
|
|
|
|
|
(defmethod ig/init-key ::gc-touched-task
|
2023-02-06 12:27:53 +01:00
|
|
|
[_ {:keys [::db/pool]}]
|
2022-08-11 16:24:23 +02:00
|
|
|
(letfn [(get-team-font-variant-nrefs [conn id]
|
|
|
|
(-> (db/exec-one! conn [sql:retrieve-team-font-variant-nrefs id id id id]) :nrefs))
|
2021-01-31 17:00:22 +01:00
|
|
|
|
2022-08-11 16:24:23 +02:00
|
|
|
(get-file-media-object-nrefs [conn id]
|
|
|
|
(-> (db/exec-one! conn [sql:retrieve-file-media-object-nrefs id id]) :nrefs))
|
2021-01-29 23:56:11 +01:00
|
|
|
|
2022-08-11 16:24:23 +02:00
|
|
|
(get-profile-nrefs [conn id]
|
|
|
|
(-> (db/exec-one! conn [sql:retrieve-profile-nrefs id id]) :nrefs))
|
2022-02-28 17:15:58 +01:00
|
|
|
|
2021-01-29 23:56:11 +01:00
|
|
|
(mark-freeze-in-bulk [conn ids]
|
|
|
|
(db/exec-one! conn ["update storage_object set touched_at=null where id = ANY(?)"
|
2022-02-10 19:50:40 +01:00
|
|
|
(db/create-array conn "uuid" ids)]))
|
2021-01-29 23:56:11 +01:00
|
|
|
|
2022-02-10 19:50:40 +01:00
|
|
|
(mark-delete-in-bulk [conn ids]
|
|
|
|
(db/exec-one! conn ["update storage_object set deleted_at=now(), touched_at=null where id = ANY(?)"
|
|
|
|
(db/create-array conn "uuid" ids)]))
|
|
|
|
|
2022-02-28 17:15:58 +01:00
|
|
|
;; NOTE: A getter that retrieves the key witch will be used
|
:wrench: Fix typos in source code
Found via `codespell -q 3 -S *.po,./frontend/yarn.lock -L childs,clen,fpr,inflight,ody,ot,ro,te,trys,ue`
2022-10-02 14:00:19 -04:00
|
|
|
;; for group ids; previously we have no value, then we
|
2022-02-28 17:15:58 +01:00
|
|
|
;; introduced the `:reference` prop, and then it is renamed
|
|
|
|
;; to `:bucket` and now is string instead. This is
|
|
|
|
;; implemented in this way for backward comaptibilty.
|
|
|
|
|
|
|
|
;; NOTE: we use the "file-media-object" as default value for
|
|
|
|
;; backward compatibility because when we deploy it we can
|
|
|
|
;; have old backend instances running in the same time as
|
|
|
|
;; the new one and we can still have storage-objects created
|
|
|
|
;; without bucket value. And we know that if it does not
|
|
|
|
;; have value, it means :file-media-object.
|
|
|
|
|
|
|
|
(get-bucket [{:keys [metadata]}]
|
|
|
|
(or (some-> metadata :bucket)
|
|
|
|
(some-> metadata :reference d/name)
|
|
|
|
"file-media-object"))
|
|
|
|
|
2022-02-10 19:50:40 +01:00
|
|
|
(retrieve-touched-chunk [conn cursor]
|
|
|
|
(let [rows (->> (db/exec! conn [sql:retrieve-touched-objects-chunk cursor])
|
2022-02-28 17:15:58 +01:00
|
|
|
(mapv #(d/update-when % :metadata db/decode-transit-pgobject)))]
|
2022-02-10 19:50:40 +01:00
|
|
|
(when (seq rows)
|
|
|
|
[(-> rows peek :created-at)
|
2022-03-22 17:23:41 +01:00
|
|
|
(d/group-by get-bucket :id #{} rows)])))
|
2021-01-04 18:41:05 +01:00
|
|
|
|
2022-02-10 19:50:40 +01:00
|
|
|
(retrieve-touched [conn]
|
2022-09-28 23:26:31 +02:00
|
|
|
(d/iteration (partial retrieve-touched-chunk conn)
|
|
|
|
:initk (dt/now)
|
|
|
|
:vf second
|
|
|
|
:kf first))
|
2022-02-10 19:50:40 +01:00
|
|
|
|
2022-08-11 16:24:23 +02:00
|
|
|
(process-objects! [conn get-fn ids bucket]
|
2022-02-10 19:50:40 +01:00
|
|
|
(loop [to-freeze #{}
|
|
|
|
to-delete #{}
|
|
|
|
ids (seq ids)]
|
|
|
|
(if-let [id (first ids)]
|
2022-08-11 16:24:23 +02:00
|
|
|
(let [nrefs (get-fn conn id)]
|
|
|
|
(if (pos? nrefs)
|
|
|
|
(do
|
2023-02-06 12:27:53 +01:00
|
|
|
(l/debug :hint "gc-touched: processing storage object"
|
|
|
|
:id id :status "freeze"
|
2022-08-11 16:24:23 +02:00
|
|
|
:bucket bucket :refs nrefs)
|
|
|
|
(recur (conj to-freeze id) to-delete (rest ids)))
|
|
|
|
(do
|
2023-02-06 12:27:53 +01:00
|
|
|
(l/debug :hint "gc-touched: processing storage object"
|
|
|
|
:id id :status "delete"
|
2022-08-11 16:24:23 +02:00
|
|
|
:bucket bucket :refs nrefs)
|
|
|
|
(recur to-freeze (conj to-delete id) (rest ids)))))
|
2022-02-10 19:50:40 +01:00
|
|
|
(do
|
|
|
|
(some->> (seq to-freeze) (mark-freeze-in-bulk conn))
|
|
|
|
(some->> (seq to-delete) (mark-delete-in-bulk conn))
|
|
|
|
[(count to-freeze) (count to-delete)]))))
|
|
|
|
]
|
2021-01-04 18:41:05 +01:00
|
|
|
|
2021-01-31 19:25:26 +01:00
|
|
|
(fn [_]
|
2021-01-04 18:41:05 +01:00
|
|
|
(db/with-atomic [conn pool]
|
2022-02-10 19:50:40 +01:00
|
|
|
(loop [to-freeze 0
|
|
|
|
to-delete 0
|
|
|
|
groups (retrieve-touched conn)]
|
2022-02-28 17:15:58 +01:00
|
|
|
(if-let [[bucket ids] (first groups)]
|
|
|
|
(let [[f d] (case bucket
|
2022-08-11 16:24:23 +02:00
|
|
|
"file-media-object" (process-objects! conn get-file-media-object-nrefs ids bucket)
|
|
|
|
"team-font-variant" (process-objects! conn get-team-font-variant-nrefs ids bucket)
|
|
|
|
"profile" (process-objects! conn get-profile-nrefs ids bucket)
|
2022-02-23 09:13:08 +01:00
|
|
|
(ex/raise :type :internal
|
|
|
|
:code :unexpected-unknown-reference
|
2022-02-28 17:15:58 +01:00
|
|
|
:hint (dm/fmt "unknown reference %" bucket)))]
|
2022-02-10 19:50:40 +01:00
|
|
|
(recur (+ to-freeze f)
|
|
|
|
(+ to-delete d)
|
|
|
|
(rest groups)))
|
2021-01-31 17:00:22 +01:00
|
|
|
(do
|
2023-02-06 12:27:53 +01:00
|
|
|
(l/info :hint "gc-touched: task finished" :to-freeze to-freeze :to-delete to-delete)
|
2022-02-10 19:50:40 +01:00
|
|
|
{:freeze to-freeze :delete to-delete})))))))
|
|
|
|
|
|
|
|
(def sql:retrieve-touched-objects-chunk
|
2022-08-11 16:24:23 +02:00
|
|
|
"SELECT so.*
|
|
|
|
FROM storage_object AS so
|
|
|
|
WHERE so.touched_at IS NOT NULL
|
|
|
|
AND so.created_at < ?
|
|
|
|
ORDER by so.created_at DESC
|
|
|
|
LIMIT 500;")
|
2022-02-10 19:50:40 +01:00
|
|
|
|
|
|
|
(def sql:retrieve-file-media-object-nrefs
|
|
|
|
"select ((select count(*) from file_media_object where media_id = ?) +
|
|
|
|
(select count(*) from file_media_object where thumbnail_id = ?)) as nrefs")
|
|
|
|
|
|
|
|
(def sql:retrieve-team-font-variant-nrefs
|
|
|
|
"select ((select count(*) from team_font_variant where woff1_file_id = ?) +
|
|
|
|
(select count(*) from team_font_variant where woff2_file_id = ?) +
|
|
|
|
(select count(*) from team_font_variant where otf_file_id = ?) +
|
|
|
|
(select count(*) from team_font_variant where ttf_file_id = ?)) as nrefs")
|
2022-02-28 17:15:58 +01:00
|
|
|
|
|
|
|
(def sql:retrieve-profile-nrefs
|
|
|
|
"select ((select count(*) from profile where photo_id = ?) +
|
|
|
|
(select count(*) from team where photo_id = ?)) as nrefs")
|