mirror of
https://github.com/caddyserver/caddy.git
synced 2025-01-06 22:40:31 -05:00
Several improvements and bug fixes related to graceful reloads
Added a -grace flag to customize graceful shutdown period, fixed bugs related to closing file descriptors (and dup'ed fds), improved healthcheck signaling to parent, fixed a race condition with the graceful listener, etc. These improvements mainly provide better support for frequent reloading or unusual use cases of Start and Stop after a Restart (POSIX systems). This forum thread was valuable help in debugging: https://forum.golangbridge.org/t/bind-address-already-in-use-even-after-listener-closed/1510?u=matt
This commit is contained in:
parent
94c746c44f
commit
b42334eb91
7 changed files with 113 additions and 76 deletions
|
@ -29,6 +29,7 @@ import (
|
|||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mholt/caddy/caddy/letsencrypt"
|
||||
"github.com/mholt/caddy/server"
|
||||
|
@ -43,11 +44,14 @@ var (
|
|||
// If true, initialization will not show any informative output.
|
||||
Quiet bool
|
||||
|
||||
// HTTP2 indicates whether HTTP2 is enabled or not
|
||||
// HTTP2 indicates whether HTTP2 is enabled or not.
|
||||
HTTP2 bool // TODO: temporary flag until http2 is standard
|
||||
|
||||
// PidFile is the path to the pidfile to create
|
||||
// PidFile is the path to the pidfile to create.
|
||||
PidFile string
|
||||
|
||||
// GracefulTimeout is the maximum duration of a graceful shutdown.
|
||||
GracefulTimeout time.Duration
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -91,17 +95,20 @@ const (
|
|||
)
|
||||
|
||||
// Start starts Caddy with the given Caddyfile. If cdyfile
|
||||
// is nil or the process is forked from a parent as part of
|
||||
// a graceful restart, Caddy will check to see if Caddyfile
|
||||
// was piped from stdin and use that. It blocks until all the
|
||||
// servers are listening.
|
||||
// is nil, the LoadCaddyfile function will be called to get
|
||||
// one.
|
||||
//
|
||||
// If this process is a fork and no Caddyfile was piped in,
|
||||
// an error will be returned (the Restart() function does this
|
||||
// for you automatically). If this process is NOT a fork and
|
||||
// cdyfile is nil, a default configuration will be assumed.
|
||||
// In any case, an error is returned if Caddy could not be
|
||||
// started.
|
||||
// This function blocks until all the servers are listening.
|
||||
//
|
||||
// Note (POSIX): If Start is called in the child process of a
|
||||
// restart more than once within the duration of the graceful
|
||||
// cutoff (i.e. the child process called Start a first time,
|
||||
// then called Stop, then Start again within the first 5 seconds
|
||||
// or however long GracefulTimeout is) and the Caddyfiles have
|
||||
// at least one listener address in common, the second Start
|
||||
// may fail with "address already in use" as there's no
|
||||
// guarantee that the parent process has relinquished the
|
||||
// address before the grace period ends.
|
||||
func Start(cdyfile Input) (err error) {
|
||||
// If we return with no errors, we must do two things: tell the
|
||||
// parent that we succeeded and write to the pidfile.
|
||||
|
@ -148,17 +155,6 @@ func Start(cdyfile Input) (err error) {
|
|||
}
|
||||
startedBefore = true
|
||||
|
||||
// Close remaining file descriptors we may have inherited that we don't need
|
||||
if IsRestart() {
|
||||
for _, fdIndex := range loadedGob.ListenerFds {
|
||||
file := os.NewFile(fdIndex, "")
|
||||
fln, err := net.FileListener(file)
|
||||
if err == nil {
|
||||
fln.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Show initialization output
|
||||
if !Quiet && !IsRestart() {
|
||||
var checkedFdLimit bool
|
||||
|
@ -192,7 +188,7 @@ func startServers(groupings bindingGroup) error {
|
|||
errChan := make(chan error, len(groupings)) // must be buffered to allow Serve functions below to return if stopped later
|
||||
|
||||
for _, group := range groupings {
|
||||
s, err := server.New(group.BindAddr.String(), group.Configs)
|
||||
s, err := server.New(group.BindAddr.String(), group.Configs, GracefulTimeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -215,7 +211,8 @@ func startServers(groupings bindingGroup) error {
|
|||
return errors.New("listener for " + s.Addr + " was not a ListenerFile")
|
||||
}
|
||||
|
||||
delete(loadedGob.ListenerFds, s.Addr) // mark it as used
|
||||
file.Close()
|
||||
delete(loadedGob.ListenerFds, s.Addr)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,6 +249,14 @@ func startServers(groupings bindingGroup) error {
|
|||
serversMu.Unlock()
|
||||
}
|
||||
|
||||
// Close the remaining (unused) file descriptors to free up resources
|
||||
if IsRestart() {
|
||||
for key, fdIndex := range loadedGob.ListenerFds {
|
||||
os.NewFile(fdIndex, "").Close()
|
||||
delete(loadedGob.ListenerFds, key)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all servers to finish starting
|
||||
startupWg.Wait()
|
||||
|
||||
|
@ -276,7 +281,9 @@ func Stop() error {
|
|||
|
||||
serversMu.Lock()
|
||||
for _, s := range servers {
|
||||
s.Stop() // TODO: error checking/reporting?
|
||||
if err := s.Stop(); err != nil {
|
||||
log.Printf("[ERROR] Stopping %s: %v", s.Addr, err)
|
||||
}
|
||||
}
|
||||
servers = []*server.Server{} // don't reuse servers
|
||||
serversMu.Unlock()
|
||||
|
@ -289,12 +296,13 @@ func Wait() {
|
|||
wg.Wait()
|
||||
}
|
||||
|
||||
// LoadCaddyfile loads a Caddyfile in a way that prioritizes
|
||||
// reading from stdin pipe; otherwise it calls loader to load
|
||||
// the Caddyfile. If loader does not return a Caddyfile, the
|
||||
// default one will be returned. Thus, if there are no other
|
||||
// errors, this function always returns at least the default
|
||||
// Caddyfile (not the previously-used Caddyfile).
|
||||
// LoadCaddyfile loads a Caddyfile, prioritizing a Caddyfile
|
||||
// piped from stdin as part of a restart (only happens on first call
|
||||
// to LoadCaddyfile). If it is not a restart, this function tries
|
||||
// calling the user's loader function, and if that returns nil, then
|
||||
// this function resorts to the default configuration. Thus, if there
|
||||
// are no other errors, this function always returns at least the
|
||||
// default Caddyfile.
|
||||
func LoadCaddyfile(loader func() (Input, error)) (cdyfile Input, err error) {
|
||||
// If we are a fork, finishing the restart is highest priority;
|
||||
// piped input is required in this case.
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/mholt/caddy/caddy/letsencrypt"
|
||||
)
|
||||
|
@ -44,17 +45,28 @@ func checkFdlimit() {
|
|||
// If this process is not a restart, this function does nothing.
|
||||
// Calling this function once this process has successfully initialized
|
||||
// is vital so that the parent process can unblock and kill itself.
|
||||
// This function is idempotent; it executes at most once per process.
|
||||
func signalSuccessToParent() {
|
||||
if IsRestart() {
|
||||
ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3
|
||||
_, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent
|
||||
if err != nil {
|
||||
log.Printf("[ERROR] Communicating successful init to parent: %v", err)
|
||||
signalParentOnce.Do(func() {
|
||||
if IsRestart() {
|
||||
ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3
|
||||
_, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent
|
||||
if err != nil {
|
||||
log.Printf("[ERROR] Communicating successful init to parent: %v", err)
|
||||
}
|
||||
ppipe.Close()
|
||||
}
|
||||
ppipe.Close()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// signalParentOnce is used to make sure that the parent is only
|
||||
// signaled once; doing so more than once breaks whatever socket is
|
||||
// at fd 4 (the reason for this is still unclear - to reproduce,
|
||||
// call Stop() and Start() in succession at least once after a
|
||||
// restart, then try loading first host of Caddyfile in the browser).
|
||||
// Do not use this directly - call signalSuccessToParent instead.
|
||||
var signalParentOnce sync.Once
|
||||
|
||||
// caddyfileGob maps bind address to index of the file descriptor
|
||||
// in the Files array passed to the child process. It also contains
|
||||
// the caddyfile contents. Used only during graceful restarts.
|
||||
|
|
|
@ -84,6 +84,11 @@ func Restart(newCaddyfile Input) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// Immediately close our dup'ed fds and the write end of our signal pipe
|
||||
for _, f := range extraFiles {
|
||||
f.Close()
|
||||
}
|
||||
|
||||
// Feed Caddyfile to the child
|
||||
err = gob.NewEncoder(wpipe).Encode(cdyfileGob)
|
||||
if err != nil {
|
||||
|
@ -92,7 +97,6 @@ func Restart(newCaddyfile Input) error {
|
|||
wpipe.Close()
|
||||
|
||||
// Determine whether child startup succeeded
|
||||
sigwpipe.Close() // close our copy of the write end of the pipe -- TODO: why?
|
||||
answer, readErr := ioutil.ReadAll(sigrpipe)
|
||||
if answer == nil || len(answer) == 0 {
|
||||
cmdErr := cmd.Wait() // get exit status
|
||||
|
@ -103,6 +107,6 @@ func Restart(newCaddyfile Input) error {
|
|||
return errIncompleteRestart
|
||||
}
|
||||
|
||||
// Looks like child was successful; we can exit gracefully.
|
||||
// Looks like child is successful; we can exit gracefully.
|
||||
return Stop()
|
||||
}
|
||||
|
|
3
dist/CHANGES.txt
vendored
3
dist/CHANGES.txt
vendored
|
@ -10,8 +10,11 @@ CHANGES
|
|||
- New -ca flag to customize ACME CA server URL
|
||||
- New -revoke flag to revoke a certificate
|
||||
- New -log flag to enable process log
|
||||
- New -pidfile flag to enable writing pidfile
|
||||
- New -grace flag to customize the graceful shutdown timeout
|
||||
- browse: Render filenames with multiple whitespace properly
|
||||
- markdown: Include Last-Modified header in response
|
||||
- markdown: Render tables, strikethrough, and fenced code blocks
|
||||
- startup, shutdown: Better Windows support
|
||||
- templates: Bug fix for .Host when port is absent
|
||||
- templates: Include Last-Modified header in response
|
||||
|
|
2
main.go
2
main.go
|
@ -10,6 +10,7 @@ import (
|
|||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mholt/caddy/caddy"
|
||||
"github.com/mholt/caddy/caddy/letsencrypt"
|
||||
|
@ -34,6 +35,7 @@ func init() {
|
|||
flag.StringVar(&conf, "conf", "", "Configuration file to use (default="+caddy.DefaultConfigFile+")")
|
||||
flag.StringVar(&cpu, "cpu", "100%", "CPU cap")
|
||||
flag.StringVar(&letsencrypt.DefaultEmail, "email", "", "Default Let's Encrypt account email address")
|
||||
flag.DurationVar(&caddy.GracefulTimeout, "grace", 5*time.Second, "Maximum duration of graceful shutdown")
|
||||
flag.StringVar(&caddy.Host, "host", caddy.DefaultHost, "Default host")
|
||||
flag.BoolVar(&caddy.HTTP2, "http2", true, "HTTP/2 support") // TODO: temporary flag until http2 merged into std lib
|
||||
flag.StringVar(&logfile, "log", "", "Process log file")
|
||||
|
|
|
@ -2,7 +2,6 @@ package server
|
|||
|
||||
import (
|
||||
"net"
|
||||
"os"
|
||||
"sync"
|
||||
"syscall"
|
||||
)
|
||||
|
@ -13,7 +12,9 @@ func newGracefulListener(l ListenerFile, wg *sync.WaitGroup) *gracefulListener {
|
|||
gl := &gracefulListener{ListenerFile: l, stop: make(chan error), httpWg: wg}
|
||||
go func() {
|
||||
<-gl.stop
|
||||
gl.Lock()
|
||||
gl.stopped = true
|
||||
gl.Unlock()
|
||||
gl.stop <- gl.ListenerFile.Close()
|
||||
}()
|
||||
return gl
|
||||
|
@ -24,12 +25,13 @@ func newGracefulListener(l ListenerFile, wg *sync.WaitGroup) *gracefulListener {
|
|||
// methods mainly wrap net.Listener to be graceful.
|
||||
type gracefulListener struct {
|
||||
ListenerFile
|
||||
stop chan error
|
||||
stopped bool
|
||||
httpWg *sync.WaitGroup // pointer to the host's wg used for counting connections
|
||||
stop chan error
|
||||
stopped bool
|
||||
sync.Mutex // protects the stopped flag
|
||||
httpWg *sync.WaitGroup // pointer to the host's wg used for counting connections
|
||||
}
|
||||
|
||||
// Accept accepts a connection. This type wraps
|
||||
// Accept accepts a connection.
|
||||
func (gl *gracefulListener) Accept() (c net.Conn, err error) {
|
||||
c, err = gl.ListenerFile.Accept()
|
||||
if err != nil {
|
||||
|
@ -42,18 +44,16 @@ func (gl *gracefulListener) Accept() (c net.Conn, err error) {
|
|||
|
||||
// Close immediately closes the listener.
|
||||
func (gl *gracefulListener) Close() error {
|
||||
gl.Lock()
|
||||
if gl.stopped {
|
||||
gl.Unlock()
|
||||
return syscall.EINVAL
|
||||
}
|
||||
gl.Unlock()
|
||||
gl.stop <- nil
|
||||
return <-gl.stop
|
||||
}
|
||||
|
||||
// File implements ListenerFile; it gets the file of the listening socket.
|
||||
func (gl *gracefulListener) File() (*os.File, error) {
|
||||
return gl.ListenerFile.File()
|
||||
}
|
||||
|
||||
// gracefulConn represents a connection on a
|
||||
// gracefulListener so that we can keep track
|
||||
// of the number of connections, thus facilitating
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
"crypto/x509"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
|
@ -26,13 +25,14 @@ import (
|
|||
// graceful termination (POSIX only).
|
||||
type Server struct {
|
||||
*http.Server
|
||||
HTTP2 bool // temporary while http2 is not in std lib (TODO: remove flag when part of std lib)
|
||||
tls bool // whether this server is serving all HTTPS hosts or not
|
||||
vhosts map[string]virtualHost // virtual hosts keyed by their address
|
||||
listener ListenerFile // the listener which is bound to the socket
|
||||
listenerMu sync.Mutex // protects listener
|
||||
httpWg sync.WaitGroup // used to wait on outstanding connections
|
||||
startChan chan struct{} // used to block until server is finished starting
|
||||
HTTP2 bool // temporary while http2 is not in std lib (TODO: remove flag when part of std lib)
|
||||
tls bool // whether this server is serving all HTTPS hosts or not
|
||||
vhosts map[string]virtualHost // virtual hosts keyed by their address
|
||||
listener ListenerFile // the listener which is bound to the socket
|
||||
listenerMu sync.Mutex // protects listener
|
||||
httpWg sync.WaitGroup // used to wait on outstanding connections
|
||||
startChan chan struct{} // used to block until server is finished starting
|
||||
connTimeout time.Duration // the maximum duration of a graceful shutdown
|
||||
}
|
||||
|
||||
// ListenerFile represents a listener.
|
||||
|
@ -42,14 +42,17 @@ type ListenerFile interface {
|
|||
}
|
||||
|
||||
// New creates a new Server which will bind to addr and serve
|
||||
// the sites/hosts configured in configs. This function does
|
||||
// not start serving.
|
||||
// the sites/hosts configured in configs. Its listener will
|
||||
// gracefully close when the server is stopped which will take
|
||||
// no longer than gracefulTimeout.
|
||||
//
|
||||
// This function does not start serving.
|
||||
//
|
||||
// Do not re-use a server (start, stop, then start again). We
|
||||
// could probably add more locking to make this possible, but
|
||||
// as it stands, you should dispose of a server after stopping it.
|
||||
// The behavior of serving with a spent server is undefined.
|
||||
func New(addr string, configs []Config) (*Server, error) {
|
||||
func New(addr string, configs []Config, gracefulTimeout time.Duration) (*Server, error) {
|
||||
var tls bool
|
||||
if len(configs) > 0 {
|
||||
tls = configs[0].TLS.Enabled
|
||||
|
@ -63,9 +66,10 @@ func New(addr string, configs []Config) (*Server, error) {
|
|||
// WriteTimeout: 2 * time.Minute,
|
||||
// MaxHeaderBytes: 1 << 16,
|
||||
},
|
||||
tls: tls,
|
||||
vhosts: make(map[string]virtualHost),
|
||||
startChan: make(chan struct{}),
|
||||
tls: tls,
|
||||
vhosts: make(map[string]virtualHost),
|
||||
startChan: make(chan struct{}),
|
||||
connTimeout: gracefulTimeout,
|
||||
}
|
||||
s.Handler = s // this is weird, but whatever
|
||||
|
||||
|
@ -241,7 +245,7 @@ func serveTLSWithSNI(s *Server, ln net.Listener, tlsConfigs []TLSConfig) error {
|
|||
// connections to close (up to a max timeout of a few
|
||||
// seconds); on Windows it will close the listener
|
||||
// immediately.
|
||||
func (s *Server) Stop() error {
|
||||
func (s *Server) Stop() (err error) {
|
||||
s.Server.SetKeepAlivesEnabled(false)
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
|
@ -256,20 +260,19 @@ func (s *Server) Stop() error {
|
|||
// Wait for remaining connections to finish or
|
||||
// force them all to close after timeout
|
||||
select {
|
||||
case <-time.After(5 * time.Second): // TODO: make configurable
|
||||
case <-time.After(s.connTimeout):
|
||||
case <-done:
|
||||
}
|
||||
}
|
||||
|
||||
// Close the listener now; this stops the server without delay
|
||||
s.listenerMu.Lock()
|
||||
err := s.listener.Close()
|
||||
s.listenerMu.Unlock()
|
||||
if err != nil {
|
||||
log.Printf("[ERROR] Closing listener for %s: %v", s.Addr, err)
|
||||
if s.listener != nil {
|
||||
err = s.listener.Close()
|
||||
}
|
||||
s.listenerMu.Unlock()
|
||||
|
||||
return err
|
||||
return
|
||||
}
|
||||
|
||||
// WaitUntilStarted blocks until the server s is started, meaning
|
||||
|
@ -279,12 +282,17 @@ func (s *Server) WaitUntilStarted() {
|
|||
<-s.startChan
|
||||
}
|
||||
|
||||
// ListenerFd gets the file descriptor of the listener.
|
||||
// ListenerFd gets a dup'ed file of the listener. If there
|
||||
// is no underlying file, the return value will be nil. It
|
||||
// is the caller's responsibility to close the file.
|
||||
func (s *Server) ListenerFd() *os.File {
|
||||
s.listenerMu.Lock()
|
||||
defer s.listenerMu.Unlock()
|
||||
file, _ := s.listener.File()
|
||||
return file
|
||||
if s.listener != nil {
|
||||
file, _ := s.listener.File()
|
||||
return file
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ServeHTTP is the entry point for every request to the address that s
|
||||
|
@ -370,7 +378,7 @@ func setupClientAuth(tlsConfigs []TLSConfig, config *tls.Config) error {
|
|||
|
||||
// RunFirstStartupFuncs runs all of the server's FirstStartup
|
||||
// callback functions unless one of them returns an error first.
|
||||
// It is up the caller's responsibility to call this only once and
|
||||
// It is the caller's responsibility to call this only once and
|
||||
// at the correct time. The functions here should not be executed
|
||||
// at restarts or where the user does not explicitly start a new
|
||||
// instance of the server.
|
||||
|
|
Loading…
Reference in a new issue