From da8b7fe58f83012d9a6c6e15cb249ca5f476597c Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Mon, 19 Sep 2022 21:54:47 -0600 Subject: [PATCH] caddyhttp: Honor grace period in background (#5043) * caddyhttp: Honor grace period in background This avoids blocking during config reloads. * Don't quit process until servers shut down * Make tests more likely to pass on fast CI (#5045) * caddyhttp: Even faster shutdowns Simultaneously shut down all HTTP servers, rather than one at a time. In practice there usually won't be more than 1 that lingers. But this code ensures that they all Shutdown() in their own goroutine and then we wait for them at the end (if exiting). We also wait for them to start up so we can be fairly confident the shutdowns have begun; i.e. old servers no longer accepting new connections. * Fix comment typo * Pull functions out of loop, for readability --- caddy.go | 11 ++++ caddytest/caddytest.go | 2 +- caddytest/integration/caddyfile_test.go | 4 ++ caddytest/integration/handler_test.go | 1 + caddytest/integration/map_test.go | 1 + caddytest/integration/reverseproxy_test.go | 6 +++ caddytest/integration/sni_test.go | 3 ++ caddytest/integration/stream_test.go | 2 + modules/caddyhttp/app.go | 58 ++++++++++++++++++---- 9 files changed, 78 insertions(+), 10 deletions(-) diff --git a/caddy.go b/caddy.go index 9595c96c..584865bd 100644 --- a/caddy.go +++ b/caddy.go @@ -31,6 +31,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" "github.com/caddyserver/caddy/v2/notify" @@ -676,6 +677,10 @@ func Validate(cfg *Config) error { // Errors are logged along the way, and an appropriate exit // code is emitted. func exitProcess(ctx context.Context, logger *zap.Logger) { + // let the rest of the program know we're quitting + atomic.StoreInt32(exiting, 1) + + // give the OS or service/process manager our 2 weeks' notice: we quit if err := notify.Stopping(); err != nil { Log().Error("unable to notify service manager of stopping state", zap.Error(err)) } @@ -739,6 +744,12 @@ func exitProcess(ctx context.Context, logger *zap.Logger) { }() } +var exiting = new(int32) // accessed atomically + +// Exiting returns true if the process is exiting. +// EXPERIMENTAL API: subject to change or removal. +func Exiting() bool { return atomic.LoadInt32(exiting) == 1 } + // Duration can be an integer or a string. An integer is // interpreted as nanoseconds. If a string, it is a Go // time.Duration value such as `300ms`, `1.5h`, or `2h45m`; diff --git a/caddytest/caddytest.go b/caddytest/caddytest.go index 4fb33941..e560ab4a 100644 --- a/caddytest/caddytest.go +++ b/caddytest/caddytest.go @@ -43,7 +43,7 @@ type Defaults struct { // Default testing values var Default = Defaults{ - AdminPort: 2019, + AdminPort: 2999, // different from what a real server also running on a developer's machine might be Certifcates: []string{"/caddy.localhost.crt", "/caddy.localhost.key"}, TestRequestTimeout: 5 * time.Second, LoadRequestTimeout: 5 * time.Second, diff --git a/caddytest/integration/caddyfile_test.go b/caddytest/integration/caddyfile_test.go index 27588833..77ac54ef 100644 --- a/caddytest/integration/caddyfile_test.go +++ b/caddytest/integration/caddyfile_test.go @@ -16,6 +16,7 @@ func TestRespond(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { @@ -37,6 +38,7 @@ func TestRedirect(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { @@ -86,6 +88,7 @@ func TestReadCookie(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { @@ -109,6 +112,7 @@ func TestReplIndex(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { diff --git a/caddytest/integration/handler_test.go b/caddytest/integration/handler_test.go index fb986c7c..e295dc55 100644 --- a/caddytest/integration/handler_test.go +++ b/caddytest/integration/handler_test.go @@ -13,6 +13,7 @@ func TestBrowse(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } http://localhost:9080 { file_server browse diff --git a/caddytest/integration/map_test.go b/caddytest/integration/map_test.go index afed0c35..05959576 100644 --- a/caddytest/integration/map_test.go +++ b/caddytest/integration/map_test.go @@ -13,6 +13,7 @@ func TestMap(t *testing.T) { tester.InitServer(`{ http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { diff --git a/caddytest/integration/reverseproxy_test.go b/caddytest/integration/reverseproxy_test.go index ca11287a..dfc8df0d 100644 --- a/caddytest/integration/reverseproxy_test.go +++ b/caddytest/integration/reverseproxy_test.go @@ -18,6 +18,7 @@ func TestSRVReverseProxy(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -50,6 +51,7 @@ func TestSRVWithDial(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -115,6 +117,7 @@ func TestDialWithPlaceholderUnix(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -156,6 +159,7 @@ func TestReverseProxyWithPlaceholderDialAddress(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -239,6 +243,7 @@ func TestReverseProxyWithPlaceholderTCPDialAddress(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -321,6 +326,7 @@ func TestSRVWithActiveHealthcheck(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ diff --git a/caddytest/integration/sni_test.go b/caddytest/integration/sni_test.go index 813e9a05..aa47c75e 100644 --- a/caddytest/integration/sni_test.go +++ b/caddytest/integration/sni_test.go @@ -15,6 +15,7 @@ func TestDefaultSNI(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -112,6 +113,7 @@ func TestDefaultSNIWithNamedHostAndExplicitIP(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -212,6 +214,7 @@ func TestDefaultSNIWithPortMappingOnly(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ diff --git a/caddytest/integration/stream_test.go b/caddytest/integration/stream_test.go index cfd9d361..0cb1db2e 100644 --- a/caddytest/integration/stream_test.go +++ b/caddytest/integration/stream_test.go @@ -27,6 +27,7 @@ func TestH2ToH2CStream(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -216,6 +217,7 @@ func TestH2ToH1ChunkedResponse(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ diff --git a/modules/caddyhttp/app.go b/modules/caddyhttp/app.go index 290a4083..84b0b941 100644 --- a/modules/caddyhttp/app.go +++ b/modules/caddyhttp/app.go @@ -505,22 +505,62 @@ func (app *App) Stop() error { app.logger.Debug("servers shutting down with eternal grace period") } - // shut down servers - for _, server := range app.Servers { + // goroutines aren't guaranteed to be scheduled right away, + // so we'll use one WaitGroup to wait for all the goroutines + // to start their server shutdowns, and another to wait for + // them to finish; we'll always block for them to start so + // that when we return the caller can be confident* that the + // old servers are no longer accepting new connections + // (* the scheduler might still pause them right before + // calling Shutdown(), but it's unlikely) + var startedShutdown, finishedShutdown sync.WaitGroup + + // these will run in goroutines + stopServer := func(server *Server) { + defer finishedShutdown.Done() + startedShutdown.Done() + if err := server.server.Shutdown(ctx); err != nil { app.logger.Error("server shutdown", zap.Error(err), zap.Strings("addresses", server.Listen)) } + } + stopH3Server := func(server *Server) { + defer finishedShutdown.Done() + startedShutdown.Done() - if server.h3server != nil { - // TODO: CloseGracefully, once implemented upstream (see https://github.com/lucas-clemente/quic-go/issues/2103) - if err := server.h3server.Close(); err != nil { - app.logger.Error("HTTP/3 server shutdown", - zap.Error(err), - zap.Strings("addresses", server.Listen)) - } + if server.h3server == nil { + return } + // TODO: CloseGracefully, once implemented upstream (see https://github.com/lucas-clemente/quic-go/issues/2103) + if err := server.h3server.Close(); err != nil { + app.logger.Error("HTTP/3 server shutdown", + zap.Error(err), + zap.Strings("addresses", server.Listen)) + } + } + + for _, server := range app.Servers { + startedShutdown.Add(2) + finishedShutdown.Add(2) + go stopServer(server) + go stopH3Server(server) + } + + // block until all the goroutines have been run by the scheduler; + // this means that they have likely called Shutdown() by now + startedShutdown.Wait() + + // if the process is exiting, we need to block here and wait + // for the grace periods to complete, otherwise the process will + // terminate before the servers are finished shutting down; but + // we don't really need to wait for the grace period to finish + // if the process isn't exiting (but note that frequent config + // reloads with long grace periods for a sustained length of time + // may deplete resources) + if caddy.Exiting() { + finishedShutdown.Wait() } return nil