0
Fork 0
mirror of https://github.com/penpot/penpot.git synced 2025-01-24 15:39:50 -05:00

Make storage tasks more testable and traceable.

This commit is contained in:
Andrey Antukh 2021-01-31 17:00:22 +01:00 committed by Alonso Torres
parent 586d95fb55
commit 26948fb68b

View file

@ -23,6 +23,7 @@
[app.util.time :as dt] [app.util.time :as dt]
[app.worker :as wrk] [app.worker :as wrk]
[clojure.spec.alpha :as s] [clojure.spec.alpha :as s]
[clojure.tools.logging :as log]
[cuerdas.core :as str] [cuerdas.core :as str]
[datoteka.core :as fs] [datoteka.core :as fs]
[integrant.core :as ig] [integrant.core :as ig]
@ -285,25 +286,34 @@
(defmethod ig/init-key ::gc-deleted-task (defmethod ig/init-key ::gc-deleted-task
[_ {:keys [pool storage min-age] :as cfg}] [_ {:keys [pool storage min-age] :as cfg}]
(letfn [(retrieve-deleted-objects [conn] (letfn [(group-by-backend [rows]
(let [min-age (db/interval min-age) (let [conj (fnil conj [])]
result (db/exec! conn [sql:retrieve-deleted-objects min-age])] [(reduce (fn [acc {:keys [id backend]}]
(when (seq result) (update acc (keyword backend) conj id))
(as-> (group-by (comp keyword :backend) result) $ {}
(reduce-kv #(assoc %1 %2 (map :id %3)) $ $))))) rows)
(count rows)]))
(delete-in-bulk [conn backend ids] (retrieve-deleted-objects [conn]
(let [min-age (db/interval min-age)
rows (db/exec! conn [sql:retrieve-deleted-objects min-age])]
(some-> (seq rows) (group-by-backend))))
(delete-in-bulk [conn [backend ids]]
(let [backend (resolve-backend storage backend) (let [backend (resolve-backend storage backend)
backend (assoc backend :conn conn)] backend (assoc backend :conn conn)]
(impl/del-objects-in-bulk backend ids)))] (impl/del-objects-in-bulk backend ids)))]
(fn [task] (fn [task]
(db/with-atomic [conn pool] (db/with-atomic [conn pool]
(loop [groups (retrieve-deleted-objects conn)] (loop [n 0]
(when groups (if-let [[groups total] (retrieve-deleted-objects conn)]
(doseq [[sid objects] groups] (do
(delete-in-bulk conn sid objects)) (run! (partial delete-in-bulk conn) groups)
(recur (retrieve-deleted-objects conn)))))))) (recur (+ n total)))
(do
(log/infof "gc-deleted: processed %s items" n)
{:deleted n})))))))
(def sql:retrieve-deleted-objects (def sql:retrieve-deleted-objects
"with items_part as ( "with items_part as (
@ -342,10 +352,7 @@
(defmethod ig/init-key ::gc-touched-task (defmethod ig/init-key ::gc-touched-task
[_ {:keys [pool] :as cfg}] [_ {:keys [pool] :as cfg}]
(letfn [(retrieve-touched-objects [conn] (letfn [(group-results [rows]
(seq (db/exec! conn [sql:retrieve-touched-objects])))
(group-resuls [rows]
(let [conj (fnil conj [])] (let [conj (fnil conj [])]
(reduce (fn [acc {:keys [id nrefs]}] (reduce (fn [acc {:keys [id nrefs]}]
(if (pos? nrefs) (if (pos? nrefs)
@ -354,6 +361,10 @@
{} {}
rows))) rows)))
(retrieve-touched [conn]
(let [rows (db/exec! conn [sql:retrieve-touched-objects])]
(some-> (seq rows) (group-results))))
(mark-delete-in-bulk [conn ids] (mark-delete-in-bulk [conn ids]
(db/exec-one! conn ["update storage_object set deleted_at=now(), touched_at=null where id = ANY(?)" (db/exec-one! conn ["update storage_object set deleted_at=now(), touched_at=null where id = ANY(?)"
(db/create-array conn "uuid" (into-array java.util.UUID ids))])) (db/create-array conn "uuid" (into-array java.util.UUID ids))]))
@ -364,16 +375,17 @@
(fn [task] (fn [task]
(db/with-atomic [conn pool] (db/with-atomic [conn pool]
(loop [] (loop [cntf 0
(when-let [touched (retrieve-touched-objects conn)] cntd 0]
(let [{:keys [to-delete to-freeze]} (group-resuls touched)] (if-let [{:keys [to-delete to-freeze]} (retrieve-touched conn)]
(when (seq to-delete) (do
(mark-delete-in-bulk conn to-delete)) (when (seq to-delete) (mark-delete-in-bulk conn to-delete))
(when (seq to-freeze) (when (seq to-freeze) (mark-freeze-in-bulk conn to-freeze))
(mark-freeze-in-bulk conn to-freeze)) (recur (+ cntf (count to-freeze))
(Thread/sleep 100) (+ cntd (count to-delete))))
(recur)))) (do
nil)))) (log/infof "gc-touched: %s objects marked as freeze and %s marked to be deleted" cntf cntd)
{:freeze cntf :delete cntd})))))))
(def sql:retrieve-touched-objects (def sql:retrieve-touched-objects
"select so.id, "select so.id,
@ -400,7 +412,9 @@
;; and is inmediatelly deleted. The responsability of this task is ;; and is inmediatelly deleted. The responsability of this task is
;; check that write log for possible leaked files. ;; check that write log for possible leaked files.
(declare sql:retrieve-pending) (def recheck-min-age (dt/duration {:hours 1}))
(declare sql:retrieve-pending-to-recheck)
(declare sql:exists-storage-object) (declare sql:exists-storage-object)
(defmethod ig/pre-init-spec ::recheck-task [_] (defmethod ig/pre-init-spec ::recheck-task [_]
@ -408,39 +422,59 @@
(defmethod ig/init-key ::recheck-task (defmethod ig/init-key ::recheck-task
[_ {:keys [pool storage] :as cfg}] [_ {:keys [pool storage] :as cfg}]
(letfn [(retrieve-pending [conn] (letfn [(group-results [rows]
(->> (db/exec! conn [sql:retrieve-pending]) (let [conj (fnil conj [])]
(map (fn [{:keys [backend] :as row}] (reduce (fn [acc {:keys [id backend exist] :as row}]
(assoc row :backend (keyword backend)))) (cond-> (update acc :all conj id)
(seq))) (false? exist)
(update :to-delete conj (dissoc row :exist))))
{}
rows)))
(exists-on-database? [conn id] (group-by-backend [rows]
(:exists (db/exec-one! conn [sql:exists-storage-object id]))) (let [conj (fnil conj [])]
(reduce (fn [acc {:keys [id backend]}]
(update acc (keyword backend) conj id))
{}
rows)))
(recheck-item [conn {:keys [id backend]}] (retrieve-pending [conn]
(when-not (exists-on-database? conn id) (let [rows (db/exec! conn [sql:retrieve-pending-to-recheck (db/interval recheck-min-age)])]
(some-> (seq rows) (group-results))))
(delete-group [conn [backend ids]]
(let [backend (resolve-backend storage backend) (let [backend (resolve-backend storage backend)
backend (assoc backend :conn conn)] backend (assoc backend :conn conn)]
(impl/del-objects-in-bulk backend [id]))))] (impl/del-objects-in-bulk backend ids)))
(delete-all [conn ids]
(let [ids (db/create-array conn "uuid" (into-array java.util.UUID ids))]
(db/exec-one! conn ["delete from storage_pending where id = ANY(?)" ids])))]
(fn [task] (fn [task]
(db/with-atomic [conn pool] (db/with-atomic [conn pool]
(loop [items (retrieve-pending conn)] (loop [n 0 d 0]
(when items (if-let [{:keys [all to-delete]} (retrieve-pending conn)]
(run! (partial recheck-item conn) items) (let [groups (group-by-backend to-delete)]
(recur (retrieve-pending conn)))))))) (run! (partial delete-group conn) groups)
(delete-all conn all)
(recur (+ n (count all))
(+ d (count to-delete))))
(do
(log/infof "recheck: processed %s items, %s deleted" n d)
{:processed n :deleted d})))))))
(def sql:retrieve-pending (def sql:retrieve-pending-to-recheck
"with items_part as ( "select sp.id,
select s.id sp.backend,
from storage_pending as s sp.created_at,
where s.created_at < now() - '1 hour'::interval (case when count(so.id) > 0 then true
order by s.created_at else false
limit 100 end) as exist
) from storage_pending as sp
delete from storage_pending left join storage_object as so
where id in (select id from items_part) on (so.id = sp.id)
returning *;") where sp.created_at < now() - ?::interval
group by 1,2,3
(def sql:exists-storage-object order by sp.created_at asc
"select exists (select id from storage_object where id = ?) as exists") limit 100")