diff --git a/caddy.go b/caddy.go index 9595c96c..584865bd 100644 --- a/caddy.go +++ b/caddy.go @@ -31,6 +31,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" "github.com/caddyserver/caddy/v2/notify" @@ -676,6 +677,10 @@ func Validate(cfg *Config) error { // Errors are logged along the way, and an appropriate exit // code is emitted. func exitProcess(ctx context.Context, logger *zap.Logger) { + // let the rest of the program know we're quitting + atomic.StoreInt32(exiting, 1) + + // give the OS or service/process manager our 2 weeks' notice: we quit if err := notify.Stopping(); err != nil { Log().Error("unable to notify service manager of stopping state", zap.Error(err)) } @@ -739,6 +744,12 @@ func exitProcess(ctx context.Context, logger *zap.Logger) { }() } +var exiting = new(int32) // accessed atomically + +// Exiting returns true if the process is exiting. +// EXPERIMENTAL API: subject to change or removal. +func Exiting() bool { return atomic.LoadInt32(exiting) == 1 } + // Duration can be an integer or a string. An integer is // interpreted as nanoseconds. If a string, it is a Go // time.Duration value such as `300ms`, `1.5h`, or `2h45m`; diff --git a/caddytest/caddytest.go b/caddytest/caddytest.go index 4fb33941..e560ab4a 100644 --- a/caddytest/caddytest.go +++ b/caddytest/caddytest.go @@ -43,7 +43,7 @@ type Defaults struct { // Default testing values var Default = Defaults{ - AdminPort: 2019, + AdminPort: 2999, // different from what a real server also running on a developer's machine might be Certifcates: []string{"/caddy.localhost.crt", "/caddy.localhost.key"}, TestRequestTimeout: 5 * time.Second, LoadRequestTimeout: 5 * time.Second, diff --git a/caddytest/integration/caddyfile_test.go b/caddytest/integration/caddyfile_test.go index 27588833..77ac54ef 100644 --- a/caddytest/integration/caddyfile_test.go +++ b/caddytest/integration/caddyfile_test.go @@ -16,6 +16,7 @@ func TestRespond(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { @@ -37,6 +38,7 @@ func TestRedirect(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { @@ -86,6 +88,7 @@ func TestReadCookie(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { @@ -109,6 +112,7 @@ func TestReplIndex(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { diff --git a/caddytest/integration/handler_test.go b/caddytest/integration/handler_test.go index fb986c7c..e295dc55 100644 --- a/caddytest/integration/handler_test.go +++ b/caddytest/integration/handler_test.go @@ -13,6 +13,7 @@ func TestBrowse(t *testing.T) { { http_port 9080 https_port 9443 + grace_period 1 } http://localhost:9080 { file_server browse diff --git a/caddytest/integration/map_test.go b/caddytest/integration/map_test.go index afed0c35..05959576 100644 --- a/caddytest/integration/map_test.go +++ b/caddytest/integration/map_test.go @@ -13,6 +13,7 @@ func TestMap(t *testing.T) { tester.InitServer(`{ http_port 9080 https_port 9443 + grace_period 1 } localhost:9080 { diff --git a/caddytest/integration/reverseproxy_test.go b/caddytest/integration/reverseproxy_test.go index ca11287a..dfc8df0d 100644 --- a/caddytest/integration/reverseproxy_test.go +++ b/caddytest/integration/reverseproxy_test.go @@ -18,6 +18,7 @@ func TestSRVReverseProxy(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -50,6 +51,7 @@ func TestSRVWithDial(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -115,6 +117,7 @@ func TestDialWithPlaceholderUnix(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -156,6 +159,7 @@ func TestReverseProxyWithPlaceholderDialAddress(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -239,6 +243,7 @@ func TestReverseProxyWithPlaceholderTCPDialAddress(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -321,6 +326,7 @@ func TestSRVWithActiveHealthcheck(t *testing.T) { { "apps": { "http": { + "grace_period": 1, "servers": { "srv0": { "listen": [ diff --git a/caddytest/integration/sni_test.go b/caddytest/integration/sni_test.go index 813e9a05..aa47c75e 100644 --- a/caddytest/integration/sni_test.go +++ b/caddytest/integration/sni_test.go @@ -15,6 +15,7 @@ func TestDefaultSNI(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -112,6 +113,7 @@ func TestDefaultSNIWithNamedHostAndExplicitIP(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -212,6 +214,7 @@ func TestDefaultSNIWithPortMappingOnly(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ diff --git a/caddytest/integration/stream_test.go b/caddytest/integration/stream_test.go index cfd9d361..0cb1db2e 100644 --- a/caddytest/integration/stream_test.go +++ b/caddytest/integration/stream_test.go @@ -27,6 +27,7 @@ func TestH2ToH2CStream(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ @@ -216,6 +217,7 @@ func TestH2ToH1ChunkedResponse(t *testing.T) { "http": { "http_port": 9080, "https_port": 9443, + "grace_period": 1, "servers": { "srv0": { "listen": [ diff --git a/modules/caddyhttp/app.go b/modules/caddyhttp/app.go index 290a4083..84b0b941 100644 --- a/modules/caddyhttp/app.go +++ b/modules/caddyhttp/app.go @@ -505,22 +505,62 @@ func (app *App) Stop() error { app.logger.Debug("servers shutting down with eternal grace period") } - // shut down servers - for _, server := range app.Servers { + // goroutines aren't guaranteed to be scheduled right away, + // so we'll use one WaitGroup to wait for all the goroutines + // to start their server shutdowns, and another to wait for + // them to finish; we'll always block for them to start so + // that when we return the caller can be confident* that the + // old servers are no longer accepting new connections + // (* the scheduler might still pause them right before + // calling Shutdown(), but it's unlikely) + var startedShutdown, finishedShutdown sync.WaitGroup + + // these will run in goroutines + stopServer := func(server *Server) { + defer finishedShutdown.Done() + startedShutdown.Done() + if err := server.server.Shutdown(ctx); err != nil { app.logger.Error("server shutdown", zap.Error(err), zap.Strings("addresses", server.Listen)) } + } + stopH3Server := func(server *Server) { + defer finishedShutdown.Done() + startedShutdown.Done() - if server.h3server != nil { - // TODO: CloseGracefully, once implemented upstream (see https://github.com/lucas-clemente/quic-go/issues/2103) - if err := server.h3server.Close(); err != nil { - app.logger.Error("HTTP/3 server shutdown", - zap.Error(err), - zap.Strings("addresses", server.Listen)) - } + if server.h3server == nil { + return } + // TODO: CloseGracefully, once implemented upstream (see https://github.com/lucas-clemente/quic-go/issues/2103) + if err := server.h3server.Close(); err != nil { + app.logger.Error("HTTP/3 server shutdown", + zap.Error(err), + zap.Strings("addresses", server.Listen)) + } + } + + for _, server := range app.Servers { + startedShutdown.Add(2) + finishedShutdown.Add(2) + go stopServer(server) + go stopH3Server(server) + } + + // block until all the goroutines have been run by the scheduler; + // this means that they have likely called Shutdown() by now + startedShutdown.Wait() + + // if the process is exiting, we need to block here and wait + // for the grace periods to complete, otherwise the process will + // terminate before the servers are finished shutting down; but + // we don't really need to wait for the grace period to finish + // if the process isn't exiting (but note that frequent config + // reloads with long grace periods for a sustained length of time + // may deplete resources) + if caddy.Exiting() { + finishedShutdown.Wait() } return nil