0
Fork 0
mirror of https://github.com/penpot/penpot.git synced 2025-01-24 23:49:45 -05:00

Improve worker queue management

and add specific worker instance for webhooks
This commit is contained in:
Andrey Antukh 2022-12-05 23:01:53 +01:00
parent 7f589b09ca
commit c0a4b7dc76
3 changed files with 106 additions and 84 deletions

View file

@ -108,7 +108,9 @@
(s/def ::default-executor-parallelism ::us/integer) (s/def ::default-executor-parallelism ::us/integer)
(s/def ::scheduled-executor-parallelism ::us/integer) (s/def ::scheduled-executor-parallelism ::us/integer)
(s/def ::worker-parallelism ::us/integer)
(s/def ::worker-default-parallelism ::us/integer)
(s/def ::worker-webhook-parallelism ::us/integer)
(s/def ::authenticated-cookie-domain ::us/string) (s/def ::authenticated-cookie-domain ::us/string)
(s/def ::authenticated-cookie-name ::us/string) (s/def ::authenticated-cookie-name ::us/string)
@ -222,7 +224,8 @@
::error-report-webhook ::error-report-webhook
::default-executor-parallelism ::default-executor-parallelism
::scheduled-executor-parallelism ::scheduled-executor-parallelism
::worker-parallelism ::worker-default-parallelism
::worker-webhook-parallelism
::file-change-snapshot-every ::file-change-snapshot-every
::file-change-snapshot-timeout ::file-change-snapshot-timeout
::user-feedback-destination ::user-feedback-destination

View file

@ -494,20 +494,28 @@
{:cron #app/cron "30 */5 * * * ?" ;; every 5m {:cron #app/cron "30 */5 * * * ?" ;; every 5m
:task :audit-log-gc})]} :task :audit-log-gc})]}
::wrk/scheduler ::wrk/dispatcher
{::rds/redis (ig/ref ::rds/redis) {::rds/redis (ig/ref ::rds/redis)
::mtx/metrics (ig/ref ::mtx/metrics) ::mtx/metrics (ig/ref ::mtx/metrics)
::db/pool (ig/ref ::db/pool)} ::db/pool (ig/ref ::db/pool)}
::wrk/worker [::default ::wrk/worker]
{::wrk/parallelism (cf/get ::worker-parallelism 1) {::wrk/parallelism (cf/get ::worker-default-parallelism 1)
;; FIXME: read queues from configuration ::wrk/queue :default
::wrk/queue "default" ::rds/redis (ig/ref ::rds/redis)
::wrk/registry (ig/ref ::wrk/registry)
::mtx/metrics (ig/ref ::mtx/metrics)
::db/pool (ig/ref ::db/pool)}
[::webhook ::wrk/worker]
{::wrk/parallelism (cf/get ::worker-webhook-parallelism 1)
::wrk/queue :webhooks
::rds/redis (ig/ref ::rds/redis) ::rds/redis (ig/ref ::rds/redis)
::wrk/registry (ig/ref ::wrk/registry) ::wrk/registry (ig/ref ::wrk/registry)
::mtx/metrics (ig/ref ::mtx/metrics) ::mtx/metrics (ig/ref ::mtx/metrics)
::db/pool (ig/ref ::db/pool)}}) ::db/pool (ig/ref ::db/pool)}})
(def system nil) (def system nil)
(defn start (defn start

View file

@ -14,6 +14,7 @@
[app.common.spec :as us] [app.common.spec :as us]
[app.common.transit :as t] [app.common.transit :as t]
[app.common.uuid :as uuid] [app.common.uuid :as uuid]
[app.config :as cf]
[app.db :as db] [app.db :as db]
[app.metrics :as mtx] [app.metrics :as mtx]
[app.redis :as rds] [app.redis :as rds]
@ -174,63 +175,62 @@
(db/pgobject? props) (db/pgobject? props)
(assoc :props (db/decode-transit-pgobject props)))) (assoc :props (db/decode-transit-pgobject props))))
(s/def ::queue ::us/string)
(s/def ::wait-duration ::dt/duration) (s/def ::wait-duration ::dt/duration)
(defmethod ig/pre-init-spec ::scheduler [_] (defmethod ig/pre-init-spec ::dispatcher [_]
(s/keys :req [::mtx/metrics (s/keys :req [::mtx/metrics
::db/pool ::db/pool
::rds/redis] ::rds/redis]
:opt [::wait-duration :opt [::wait-duration
::batch-size])) ::batch-size]))
(defmethod ig/prep-key ::scheduler (defmethod ig/prep-key ::dispatcher
[_ cfg] [_ cfg]
(merge {::batch-size 1 (merge {::batch-size 100
::wait-duration (dt/duration "2s")} ::wait-duration (dt/duration "5s")}
(d/without-nils cfg))) (d/without-nils cfg)))
(def ^:private sql:select-next-tasks (def ^:private sql:select-next-tasks
"select * from task as t "select id, queue from task as t
where t.scheduled_at <= now() where t.scheduled_at <= now()
and (t.status = 'new' or t.status = 'retry') and (t.status = 'new' or t.status = 'retry')
and queue ~~* ?::text
order by t.priority desc, t.scheduled_at order by t.priority desc, t.scheduled_at
limit ? limit ?
for update skip locked") for update skip locked")
(defn- format-queue (defmethod ig/init-key ::dispatcher
[queue]
(str/ffmt "penpot-tasks-queue:%" queue))
(defmethod ig/init-key ::scheduler
[_ {:keys [::db/pool ::rds/redis ::batch-size] :as cfg}] [_ {:keys [::db/pool ::rds/redis ::batch-size] :as cfg}]
(letfn [(get-tasks-batch [conn] (letfn [(get-tasks [conn]
(->> (db/exec! conn [sql:select-next-tasks batch-size]) (let [prefix (str (cf/get :tenant) ":%")]
(map decode-task-row) (seq (db/exec! conn [sql:select-next-tasks prefix batch-size]))))
(seq)))
(queue-task [conn rconn {:keys [id queue] :as task}] (push-tasks! [conn rconn [queue tasks]]
(db/update! conn :task {:status "ready"} {:id id}) (let [ids (mapv :id tasks)
(let [queue (format-queue queue) key (str/ffmt "taskq:%" queue)
payload (t/encode id) res (rds/rpush! rconn key (mapv t/encode ids))
result (rds/rpush! rconn queue payload)] sql [(str "update task set status = 'scheduled'"
(l/debug :hist "scheduler: task pushed to redis" " where id = ANY(?)")
:task-id id (db/create-array conn "uuid" ids)]]
:key queue
:queued result)))
(run-batch [rconn] (db/exec-one! conn sql)
(l/debug :hist "dispatcher: push tasks to redis"
:queue queue
:tasks (count ids)
:queued res)))
(run-batch! [rconn]
(db/with-atomic [conn pool] (db/with-atomic [conn pool]
(when-let [tasks (get-tasks-batch conn)] (when-let [tasks (get-tasks conn)]
(run! (partial queue-task conn rconn) tasks) (->> (group-by :queue tasks)
true))) (run! (partial push-tasks! conn rconn)))
] true)))]
(if (db/read-only? pool) (if (db/read-only? pool)
(l/warn :hint "scheduler: not started (db is read-only)") (l/warn :hint "dispatcher: not started (db is read-only)")
(px/thread (px/thread
{:name "penpot/scheduler"} {:name "penpot/worker-dispatcher"}
(l/info :hint "scheduler: started") (l/info :hint "dispatcher: started")
(try (try
(dm/with-open [rconn (rds/connect redis)] (dm/with-open [rconn (rds/connect redis)]
(loop [] (loop []
@ -238,7 +238,7 @@
(throw (InterruptedException. "interrumpted"))) (throw (InterruptedException. "interrumpted")))
(try (try
(when-not (run-batch rconn) (when-not (run-batch! rconn)
(px/sleep (::wait-duration cfg))) (px/sleep (::wait-duration cfg)))
(catch InterruptedException cause (catch InterruptedException cause
(throw cause)) (throw cause))
@ -246,29 +246,29 @@
(cond (cond
(rds/exception? cause) (rds/exception? cause)
(do (do
(l/warn :hint "scheduler: redis exception (will retry in an instant)" :cause cause) (l/warn :hint "dispatcher: redis exception (will retry in an instant)" :cause cause)
(px/sleep (::rds/timeout rconn))) (px/sleep (::rds/timeout rconn)))
(db/sql-exception? cause) (db/sql-exception? cause)
(do (do
(l/warn :hint "scheduler: database exception (will retry in an instant)" :cause cause) (l/warn :hint "dispatcher: database exception (will retry in an instant)" :cause cause)
(px/sleep (::rds/timeout rconn))) (px/sleep (::rds/timeout rconn)))
:else :else
(do (do
(l/error :hint "scheduler: unhandled exception (will retry in an instant)" :cause cause) (l/error :hint "dispatcher: unhandled exception (will retry in an instant)" :cause cause)
(px/sleep (::rds/timeout rconn)))))) (px/sleep (::rds/timeout rconn))))))
(recur))) (recur)))
(catch InterruptedException _ (catch InterruptedException _
(l/debug :hint "scheduler: interrupted")) (l/debug :hint "dispatcher: interrupted"))
(catch Throwable cause (catch Throwable cause
(l/error :hint "scheduler: unexpected exception" :cause cause)) (l/error :hint "dispatcher: unexpected exception" :cause cause))
(finally (finally
(l/info :hint "scheduler: terminated"))))))) (l/info :hint "dispatcher: terminated")))))))
(defmethod ig/halt-key! ::scheduler (defmethod ig/halt-key! ::dispatcher
[_ thread] [_ thread]
(some-> thread px/interrupt!)) (some-> thread px/interrupt!))
@ -288,34 +288,36 @@
::queue ::queue
::registry])) ::registry]))
;; FIXME: define queue as set
(defmethod ig/prep-key ::worker (defmethod ig/prep-key ::worker
[_ cfg] [_ cfg]
(merge {::queue "default" ::parallelism 1} (merge {::parallelism 1}
(d/without-nils cfg))) (d/without-nils cfg)))
(defmethod ig/init-key ::worker (defmethod ig/init-key ::worker
[_ {:keys [::db/pool ::queue ::parallelism] :as cfg}] [_ {:keys [::db/pool ::queue ::parallelism] :as cfg}]
(let [queue (d/name queue)
cfg (assoc cfg ::queue queue)]
(if (db/read-only? pool) (if (db/read-only? pool)
(l/warn :hint "workers: not started (db is read-only)" :queue queue) (l/warn :hint "worker: not started (db is read-only)" :queue queue :parallelism parallelism)
(doall (doall
(->> (range parallelism) (->> (range parallelism)
(map #(assoc cfg ::worker-id %)) (map #(assoc cfg ::worker-id %))
(map start-worker!))))) (map start-worker!))))))
(defmethod ig/halt-key! ::worker (defmethod ig/halt-key! ::worker
[_ threads] [_ threads]
(run! px/interrupt! threads)) (run! px/interrupt! threads))
(defn- start-worker! (defn- start-worker!
[{:keys [::rds/redis ::worker-id] :as cfg}] [{:keys [::rds/redis ::worker-id ::queue] :as cfg}]
(px/thread (px/thread
{:name (format "penpot/worker/%s" worker-id)} {:name (format "penpot/worker/%s" worker-id)}
(l/info :hint "worker: started" :worker-id worker-id) (l/info :hint "worker: started" :worker-id worker-id :queue queue)
(try (try
(dm/with-open [rconn (rds/connect redis)] (dm/with-open [rconn (rds/connect redis)]
(let [cfg (-> cfg (let [tenant (cf/get :tenant "main")
(update ::queue format-queue) cfg (-> cfg
(assoc ::queue (str/ffmt "taskq:%:%" tenant queue))
(assoc ::rds/rconn rconn) (assoc ::rds/rconn rconn)
(assoc ::timeout (dt/duration "5s")))] (assoc ::timeout (dt/duration "5s")))]
(loop [] (loop []
@ -327,13 +329,17 @@
(catch InterruptedException _ (catch InterruptedException _
(l/debug :hint "worker: interrupted" (l/debug :hint "worker: interrupted"
:worker-id worker-id)) :worker-id worker-id
:queue queue))
(catch Throwable cause (catch Throwable cause
(l/error :hint "worker: unexpected exception" (l/error :hint "worker: unexpected exception"
:worker-id worker-id :worker-id worker-id
:queue queue
:cause cause)) :cause cause))
(finally (finally
(l/info :hint "worker: terminated" :worker-id worker-id))))) (l/info :hint "worker: terminated"
:worker-id worker-id
:queue queue)))))
(defn- run-worker-loop! (defn- run-worker-loop!
[{:keys [::db/pool ::rds/rconn ::timeout ::queue ::registry ::worker-id]}] [{:keys [::db/pool ::rds/rconn ::timeout ::queue ::registry ::worker-id]}]
@ -631,9 +637,26 @@
;; SUBMIT API ;; SUBMIT API
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(s/def ::task keyword?) (defn- extract-props
[options]
(let [cns (namespace ::sample)]
(persistent!
(reduce-kv (fn [res k v]
(cond-> res
(not= (namespace k) cns)
(assoc! k v)))
(transient {})
options))))
(def ^:private sql:insert-new-task
"insert into task (id, name, props, queue, priority, max_retries, scheduled_at)
values (?, ?, ?, ?, ?, ?, now() + ?)
returning id")
(s/def ::task (s/or :kw keyword? :str string?))
(s/def ::queue (s/or :kw keyword? :str string?))
(s/def ::delay (s/or :int ::us/integer :duration dt/duration?)) (s/def ::delay (s/or :int ::us/integer :duration dt/duration?))
(s/def ::conn some?) (s/def ::conn (s/or :pool ::db/pool :connection some?))
(s/def ::priority ::us/integer) (s/def ::priority ::us/integer)
(s/def ::max-retries ::us/integer) (s/def ::max-retries ::us/integer)
@ -641,36 +664,24 @@
(s/keys :req [::task ::conn] (s/keys :req [::task ::conn]
:opt [::delay ::queue ::priority ::max-retries])) :opt [::delay ::queue ::priority ::max-retries]))
(defn- extract-props
[options]
(persistent!
(reduce-kv (fn [res k v]
(cond-> res
(not (qualified-keyword? k))
(assoc! k v)))
(transient {})
options)))
(def ^:private sql:insert-new-task
"insert into task (id, name, props, queue, priority, max_retries, scheduled_at)
values (?, ?, ?, ?, ?, ?, now() + ?)
returning id")
(defn submit! (defn submit!
[& {:keys [::task ::delay ::queue ::priority ::max-retries ::conn] [& {:keys [::task ::delay ::queue ::priority ::max-retries ::conn]
:or {delay 0 queue "default" priority 100 max-retries 3} :or {delay 0 queue :default priority 100 max-retries 3}
:as options}] :as options}]
(us/verify ::submit-options options) (us/verify ::submit-options options)
(let [duration (dt/duration delay) (let [duration (dt/duration delay)
interval (db/interval duration) interval (db/interval duration)
props (-> options extract-props db/tjson) props (-> options extract-props db/tjson)
id (uuid/next)] id (uuid/next)
tenant (cf/get :tenant)
task (d/name task)
queue (str/ffmt "%:%" tenant (d/name queue))]
(l/debug :hint "submit task" (l/debug :hint "submit task"
:name (d/name task) :name task
:queue queue :queue queue
:in (dt/format-duration duration)) :in (dt/format-duration duration))
(db/exec-one! conn [sql:insert-new-task id (d/name task) props (db/exec-one! conn [sql:insert-new-task id task props
queue priority max-retries interval]) queue priority max-retries interval])
id)) id))