From d68286821b48418cf6672c4d5707ad6168880bab Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Mon, 25 Jan 2021 15:22:39 +0100 Subject: [PATCH] :sparkles: Add the notion of temporal files on the storage. --- backend/src/app/config.clj | 13 ++-- backend/src/app/http.clj | 11 ++-- backend/src/app/http/assets.clj | 105 ++++++++++++++++++++------------ backend/src/app/main.clj | 17 ++++-- backend/src/app/storage.clj | 89 +++++++++++++++++++-------- backend/src/app/storage/db.clj | 2 +- backend/src/app/storage/fs.clj | 32 +++++----- backend/src/app/storage/s3.clj | 2 +- 8 files changed, 176 insertions(+), 95 deletions(-) diff --git a/backend/src/app/config.clj b/backend/src/app/config.clj index f258f97b4..af708001e 100644 --- a/backend/src/app/config.clj +++ b/backend/src/app/config.clj @@ -30,12 +30,17 @@ :redis-uri "redis://localhost/0" :storage-backend :fs - :storage-fs-old-directory "resources/public/media" + :storage-fs-directory "resources/public/assets" - :storage-fs-uri "http://localhost:3449/internal/assets/" :storage-s3-region :eu-central-1 :storage-s3-bucket "penpot-devenv-assets-pre" + :local-assets-uri "http://localhost:3449/internal/assets/" + + ;; Special configuration for TMP backend. + :storage-tmp-directory "/tmp/penpot" + :storage-tmp-uri "file:///tmp/penpot/" + :rlimits-password 10 :rlimits-image 2 @@ -83,7 +88,7 @@ (s/def ::storage-backend ::us/keyword) (s/def ::storage-fs-directory ::us/string) -(s/def ::storage-fs-uri ::us/string) +(s/def ::local-assets-uri ::us/string) (s/def ::storage-s3-region ::us/keyword) (s/def ::storage-s3-bucket ::us/string) @@ -193,7 +198,7 @@ ::smtp-username ::storage-backend ::storage-fs-directory - ::storage-fs-uri + ::local-assets-uri ::storage-s3-bucket ::storage-s3-region ::telemetry-enabled diff --git a/backend/src/app/http.clj b/backend/src/app/http.clj index 911639cb5..f29c6388f 100644 --- a/backend/src/app/http.clj +++ b/backend/src/app/http.clj @@ -87,9 +87,10 @@ (s/def ::gitlab-auth map?) (s/def ::ldap-auth fn?) (s/def ::storage map?) +(s/def ::assets map?) (defmethod ig/pre-init-spec ::router [_] - (s/keys :req-un [::rpc ::session ::metrics ::google-auth ::gitlab-auth ::storage])) + (s/keys :req-un [::rpc ::session ::metrics ::google-auth ::gitlab-auth ::storage ::assets])) (defmethod ig/init-key ::router [_ cfg] @@ -109,15 +110,15 @@ :body "internal server error"})))))) (defn- create-router - [{:keys [session rpc google-auth gitlab-auth github-auth metrics ldap-auth storage svgparse] :as cfg}] + [{:keys [session rpc google-auth gitlab-auth github-auth metrics ldap-auth storage svgparse assets] :as cfg}] (rr/router [["/metrics" {:get (:handler metrics)}] ["/assets" {:middleware [[middleware/format-response-body] [middleware/errors errors/handle]]} - ["/by-id/:id" {:get #(assets/objects-handler storage %)}] - ["/by-file-media-id/:id" {:get #(assets/file-objects-handler storage %)}] - ["/by-file-media-id/:id/thumbnail" {:get #(assets/file-thumbnails-handler storage %)}]] + ["/by-id/:id" {:get (:objects-handler assets)}] + ["/by-file-media-id/:id" {:get (:file-objects-handler assets)}] + ["/by-file-media-id/:id/thumbnail" {:get (:file-thumbnails-handler assets)}]] ["/dbg" ["/error-by-id/:id" {:get (:error-report-handler cfg)}]] diff --git a/backend/src/app/http/assets.clj b/backend/src/app/http/assets.clj index fad047948..9fbec9934 100644 --- a/backend/src/app/http/assets.clj +++ b/backend/src/app/http/assets.clj @@ -10,11 +10,16 @@ (ns app.http.assets "Assets related handlers." (:require - [app.common.spec :as us] [app.common.exceptions :as ex] - [app.storage :as sto] + [app.common.spec :as us] [app.db :as db] - [app.util.time :as dt])) + [app.storage :as sto] + [app.util.time :as dt] + [app.metrics :as mtx] + [cuerdas.core :as str] + [clojure.spec.alpha :as s] + [lambdaisland.uri :as u] + [integrant.core :as ig])) (def ^:private cache-max-age (dt/duration {:hours 24})) @@ -22,8 +27,25 @@ (def ^:private signature-max-age (dt/duration {:hours 24 :minutes 15})) +(defn coerce-id + [id] + (let [res (us/uuid-conformer id)] + (when-not (uuid? res) + (ex/raise :type :not-found + :hint "object not found")) + res)) + +(defn- get-file-media-object + [{:keys [pool] :as storage} id] + (let [id (coerce-id id) + mobj (db/exec-one! pool ["select * from file_media_object where id=?" id])] + (when-not mobj + (ex/raise :type :not-found + :hint "object does not found")) + mobj)) + (defn- serve-object - [storage obj] + [{:keys [storage] :as cfg} obj] (let [mdata (meta obj) backend (sto/resolve-backend storage (:backend obj))] (case (:type backend) @@ -42,53 +64,56 @@ :body ""}) :fs - (let [url (sto/get-object-url storage obj)] + (let [purl (u/uri (:public-uri cfg)) + path (sto/object->path obj) + purl (update purl :path + (fn [existing] + (if (str/ends-with? existing "/") + (str existing path) + (str existing "/" path))))] {:status 204 - :headers {"x-accel-redirect" (:path url) + :headers {"x-accel-redirect" (:path purl) "content-type" (:content-type mdata) - "cache-control" (str "max-age=" (inst-ms cache-max-age)) - } + "cache-control" (str "max-age=" (inst-ms cache-max-age))} :body ""})))) (defn- generic-handler - [{:keys [pool] :as storage} request id] - (with-open [conn (db/open pool)] - (let [storage (assoc storage :conn conn) - obj (sto/get-object storage id)] - (if obj - (serve-object storage obj) - {:status 404 :body ""})))) - -(defn coerce-id - [id] - (let [res (us/uuid-conformer id)] - (when-not (uuid? res) - (ex/raise :type :not-found - :hint "object not found")) - res)) - -(defn- get-file-media-object - [conn id] - (let [id (coerce-id id) - mobj (db/exec-one! conn ["select * from file_media_object where id=?" id])] - (when-not mobj - (ex/raise :type :not-found - :hint "object does not found")) - mobj)) + [{:keys [storage] :as cfg} request id] + (let [obj (sto/get-object storage id)] + (if obj + (serve-object cfg obj) + {:status 404 :body ""}))) (defn objects-handler - [storage request] + [{:keys [storage] :as cfg} request] (let [id (get-in request [:path-params :id])] - (generic-handler storage request (coerce-id id)))) + (generic-handler cfg request (coerce-id id)))) (defn file-objects-handler - [{:keys [pool] :as storage} request] + [{:keys [storage] :as cfg} request] (let [id (get-in request [:path-params :id]) - mobj (get-file-media-object pool id)] - (generic-handler storage request (:media-id mobj)))) + mobj (get-file-media-object storage id)] + (generic-handler cfg request (:media-id mobj)))) (defn file-thumbnails-handler - [{:keys [pool] :as storage} request] + [{:keys [storage] :as cfg} request] (let [id (get-in request [:path-params :id]) - mobj (get-file-media-object pool id)] - (generic-handler storage request (or (:thumbnail-id mobj) (:media-id mobj))))) + mobj (get-file-media-object storage id)] + (generic-handler cfg request (or (:thumbnail-id mobj) (:media-id mobj))))) + + +;; --- Initialization + +(s/def ::storage some?) +(s/def ::public-uri ::us/string) +(s/def ::cache-max-age ::dt/duration) +(s/def ::signature-max-age ::dt/duration) + +(defmethod ig/pre-init-spec ::handlers [_] + (s/keys :req-un [::storage ::mtx/metrics ::public-uri ::cache-max-age ::signature-max-age])) + +(defmethod ig/init-key ::handlers + [_ cfg] + {:objects-handler #(objects-handler cfg %) + :file-objects-handler #(file-objects-handler cfg %) + :file-thumbnails-handler #(file-thumbnails-handler cfg %)}) diff --git a/backend/src/app/main.clj b/backend/src/app/main.clj index 2523bd030..7f7f370f6 100644 --- a/backend/src/app/main.clj +++ b/backend/src/app/main.clj @@ -57,7 +57,8 @@ :app.storage/gc-task {:pool (ig/ref :app.db/pool) - :storage (ig/ref :app.storage/storage)} + :storage (ig/ref :app.storage/storage) + :min-age (dt/duration {:hours 2})} :app.storage/recheck-task {:pool (ig/ref :app.db/pool) @@ -83,10 +84,18 @@ :gitlab-auth (ig/ref :app.http.auth/gitlab) :github-auth (ig/ref :app.http.auth/github) :ldap-auth (ig/ref :app.http.auth/ldap) + :assets (ig/ref :app.http.assets/handlers) :svgparse (ig/ref :app.svgparse/handler) :storage (ig/ref :app.storage/storage) :error-report-handler (ig/ref :app.error-reporter/handler)} + :app.http.assets/handlers + {:metrics (ig/ref :app.metrics/metrics) + :public-uri (:local-assets-uri cfg/config) + :storage (ig/ref :app.storage/storage) + :cache-max-age (dt/duration {:hours 24}) + :signature-max-age (dt/duration {:hours 24 :minutes 5})} + :app.svgparse/svgc {:metrics (ig/ref :app.metrics/metrics)} @@ -266,9 +275,9 @@ :app.storage/storage {:pool (ig/ref :app.db/pool) :executor (ig/ref :app.worker/executor) - :backends {:s3 (ig/ref :app.storage.s3/backend) - :fs (ig/ref :app.storage.fs/backend) - :db (ig/ref :app.storage.db/backend)}} + :backends {:s3 (ig/ref :app.storage.s3/backend) + :db (ig/ref :app.storage.db/backend) + :fs (ig/ref :app.storage.fs/backend)}} :app.storage.s3/backend {:region (:storage-s3-region cfg/config) diff --git a/backend/src/app/storage.clj b/backend/src/app/storage.clj index 32b9fbdf1..54fa475d8 100644 --- a/backend/src/app/storage.clj +++ b/backend/src/app/storage.clj @@ -36,10 +36,13 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (s/def ::backend ::us/keyword) + +(s/def ::s3 ::ss3/backend) +(s/def ::fs ::sfs/backend) +(s/def ::db ::sdb/backend) + (s/def ::backends - (s/map-of ::us/keyword (s/or :s3 (s/nilable ::ss3/backend) - :fs (s/nilable ::sfs/backend) - :db (s/nilable ::sdb/backend)))) + (s/keys :opt-un [::s3 ::fs ::db])) (defmethod ig/pre-init-spec ::storage [_] (s/keys :req-un [::backend ::wrk/executor ::db/pool ::backends])) @@ -57,7 +60,7 @@ ;; Database Objects ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(defrecord StorageObject [id size created-at backend]) +(defrecord StorageObject [id size created-at expired-at backend]) (def ^:private sql:insert-storage-object @@ -65,40 +68,58 @@ values (?, ?, ?, ?::jsonb) returning *") +(def ^:private + sql:insert-storage-object-with-expiration + "insert into storage_object (id, size, backend, metadata, deleted_at) + values (?, ?, ?, ?::jsonb, ?) + returning *") + +(defn- insert-object + [conn id size backend mdata expiration] + (if expiration + (db/exec-one! conn [sql:insert-storage-object-with-expiration id size backend mdata expiration]) + (db/exec-one! conn [sql:insert-storage-object id size backend mdata]))) + (defn- create-database-object [{:keys [conn backend]} {:keys [content] :as object}] (if (instance? StorageObject object) (let [id (uuid/random) mdata (meta object) - result (db/exec-one! conn [sql:insert-storage-object id - (:size object) - (name backend) - (db/tjson mdata)])] + result (insert-object conn + id + (:size object) + (name backend) + (db/tjson mdata) + (:expired-at object))] (assoc object :id (:id result) :backend backend :created-at (:created-at result))) (let [id (uuid/random) - mdata (dissoc object :content) - result (db/exec-one! conn [sql:insert-storage-object id - (count content) - (name backend) - (db/tjson mdata)])] + mdata (dissoc object :content :expired-at) + result (insert-object conn + id + (count content) + (name backend) + (db/tjson mdata) + (:expired-at object))] (StorageObject. (:id result) (:size result) (:created-at result) + (:deleted-at result) backend mdata nil)))) (def ^:private sql:retrieve-storage-object - "select * from storage_object where id = ? and deleted_at is null") + "select * from storage_object where id = ? and (deleted_at is null or deleted_at > now())") (defn row->storage-object [res] (let [mdata (some-> (:metadata res) (db/decode-transit-pgobject))] (StorageObject. (:id res) (:size res) (:created-at res) + (:deleted-at res) (keyword (:backend res)) mdata nil))) @@ -109,7 +130,7 @@ (row->storage-object res))) (def sql:delete-storage-object - "update storage_object set deleted_at=now() where id=? and deleted_at is null") + "update storage_object set deleted_at=now() where id=?") (defn- delete-database-object [{:keys [conn] :as storage} id] @@ -183,16 +204,28 @@ ([storage object] (get-object-url storage object nil)) ([{:keys [conn pool] :as storage} object options] - ;; As this operation does not need the database connection, the - ;; assoc of the conn to backend is ommited. (-> (assoc storage :conn (or conn pool)) (resolve-backend (:backend object)) (impl/get-object-url object options)))) +(defn object->path + [{:keys [id] :as obj}] + (impl/id->path id)) + (defn del-object - [{:keys [conn pool] :as storage} id] + [{:keys [conn pool] :as storage} id-or-obj] (-> (assoc storage :conn (or conn pool)) - (delete-database-object id))) + (delete-database-object (if (uuid? id-or-obj) id-or-obj (:id id-or-obj))))) + +(defn put-tmp-object + "A special function for create an object explicitly setting the TMP backend + and marking the object as deleted." + [storage params] + (let [storage (assoc storage :backend :fs) + params (assoc params + :expired-at (dt/in-future {:hours 2}) + :temporal true)] + (put-object storage params))) ;; --- impl @@ -214,15 +247,19 @@ (declare sql:retrieve-deleted-objects) +(s/def ::min-age ::dt/duration) + (defmethod ig/pre-init-spec ::gc-task [_] - (s/keys :req-un [::storage ::db/pool])) + (s/keys :req-un [::storage ::db/pool ::min-age])) (defmethod ig/init-key ::gc-task - [_ {:keys [pool storage] :as cfg}] + [_ {:keys [pool storage min-age] :as cfg}] (letfn [(retrieve-deleted-objects [conn] - (when-let [result (seq (db/exec! conn [sql:retrieve-deleted-objects]))] - (as-> (group-by (comp keyword :backend) result) $ - (reduce-kv #(assoc %1 %2 (map :id %3)) $ $)))) + (let [min-age (db/interval min-age) + result (db/exec! conn [sql:retrieve-deleted-objects min-age])] + (when (seq result) + (as-> (group-by (comp keyword :backend) result) $ + (reduce-kv #(assoc %1 %2 (map :id %3)) $ $))))) (delete-in-bulk [conn backend ids] (let [backend (resolve-backend storage backend) @@ -239,8 +276,10 @@ (def sql:retrieve-deleted-objects "with items_part as ( - select s.id from storage_object as s + select s.id + from storage_object as s where s.deleted_at is not null + and s.deleted_at < (now() - ?::interval) order by s.deleted_at limit 500 ) diff --git a/backend/src/app/storage/db.clj b/backend/src/app/storage/db.clj index 6ca3a8044..e083128f3 100644 --- a/backend/src/app/storage/db.clj +++ b/backend/src/app/storage/db.clj @@ -34,7 +34,7 @@ [_ cfg] (assoc cfg :type :db)) -(s/def ::type #{:db}) +(s/def ::type ::us/keyword) (s/def ::backend (s/keys :req-un [::type ::db/pool])) diff --git a/backend/src/app/storage/fs.clj b/backend/src/app/storage/fs.clj index fb6015e46..998b0e8e8 100644 --- a/backend/src/app/storage/fs.clj +++ b/backend/src/app/storage/fs.clj @@ -29,22 +29,25 @@ ;; --- BACKEND INIT (s/def ::directory ::us/string) -(s/def ::uri ::us/string) (defmethod ig/pre-init-spec ::backend [_] - (s/keys :opt-un [::directory ::uri])) + (s/keys :opt-un [::directory])) (defmethod ig/init-key ::backend - [_ cfg] + [key cfg] ;; Return a valid backend data structure only if all optional ;; parameters are provided. - (when (and (string? (:directory cfg)) - (string? (:uri cfg))) - (assoc cfg :type :fs))) + (when (string? (:directory cfg)) + (let [dir (fs/normalize (:directory cfg))] + (assoc cfg + :type :fs + :directory (str dir) + :uri (u/uri (str "file://" dir)))))) -(s/def ::type #{:fs}) +(s/def ::type ::us/keyword) +(s/def ::uri #(instance? lambdaisland.uri.URI %)) (s/def ::backend - (s/keys :req-un [::directory ::uri ::type])) + (s/keys :req-un [::type ::directory ::uri])) ;; --- API IMPL @@ -82,13 +85,12 @@ (io/input-stream full))) (defmethod impl/get-object-url :fs - [backend {:keys [id] :as object} _] - (let [uri (u/uri (:uri backend))] - (update uri :path - (fn [existing] - (if (str/ends-with? existing "/") - (str existing (impl/id->path id)) - (str existing "/" (impl/id->path id))))))) + [{:keys [uri] :as backend} {:keys [id] :as object} _] + (update uri :path + (fn [existing] + (if (str/ends-with? existing "/") + (str existing (impl/id->path id)) + (str existing "/" (impl/id->path id)))))) (defmethod impl/del-objects-in-bulk :fs [backend ids] diff --git a/backend/src/app/storage/s3.clj b/backend/src/app/storage/s3.clj index b0166a97a..f8027a079 100644 --- a/backend/src/app/storage/s3.clj +++ b/backend/src/app/storage/s3.clj @@ -76,7 +76,7 @@ :presigner presigner :type :s3)))) -(s/def ::type #{:s3}) +(s/def ::type ::us/keyword) (s/def ::client #(instance? S3Client %)) (s/def ::presigner #(instance? S3Presigner %)) (s/def ::backend