From 3067074d9cf94dba8b27c31408b71c9f75defb40 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 17 Apr 2024 19:12:03 -0600 Subject: [PATCH] encode: Improve Etag handling (fix #5849) We also improve Last-Modified handling in the file server. Both changes should be more compliant with RFC 9110. --- modules/caddyhttp/encode/encode.go | 32 ++++++++++++++++++ modules/caddyhttp/fileserver/browse.go | 14 ++++++++ .../caddyhttp/fileserver/browsetplcontext.go | 12 ++++++- modules/caddyhttp/fileserver/staticfiles.go | 33 +++++++++++++------ 4 files changed, 80 insertions(+), 11 deletions(-) diff --git a/modules/caddyhttp/encode/encode.go b/modules/caddyhttp/encode/encode.go index 7d5f556f..f4bd7ae4 100644 --- a/modules/caddyhttp/encode/encode.go +++ b/modules/caddyhttp/encode/encode.go @@ -156,6 +156,18 @@ func (enc *Encode) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyh } w = enc.openResponseWriter(encName, w) defer w.(*responseWriter).Close() + + // to comply with RFC 9110 section 8.8.3(.3), we modify the Etag when encoding + // by appending a hyphen and the encoder name; the problem is, the client will + // send back that Etag in a If-None-Match header, but upstream handlers that set + // the Etag in the first place don't know that we appended to their Etag! so here + // we have to strip our addition so the upstream handlers can still honor client + // caches without knowing about our changes... + if etag := r.Header.Get("If-None-Match"); etag != "" && !strings.HasPrefix(etag, "W/") { + etag = strings.TrimSuffix(etag, "-"+encName+`"`) + `"` + r.Header.Set("If-None-Match", etag) + } + break } } @@ -220,6 +232,14 @@ type responseWriter struct { func (rw *responseWriter) WriteHeader(status int) { rw.statusCode = status + // See #5849 and RFC 9110 section 15.4.5 (https://www.rfc-editor.org/rfc/rfc9110.html#section-15.4.5) - 304 + // Not Modified must have certain headers set as if it was a 200 response, and according to the issue + // we would miss the Vary header in this case when compression was also enabled; note that we set this + // header in the responseWriter.init() method but that is only called if we are writing a response body + if status == http.StatusNotModified { + rw.Header().Add("Vary", "Accept-Encoding") + } + // write status immediately when status code is informational // see: https://caddy.community/t/disappear-103-early-hints-response-with-encode-enable-caddy-v2-7-6/23081/5 if 100 <= status && status <= 199 { @@ -334,6 +354,18 @@ func (rw *responseWriter) init() { rw.Header().Set("Content-Encoding", rw.encodingName) rw.Header().Add("Vary", "Accept-Encoding") rw.Header().Del("Accept-Ranges") // we don't know ranges for dynamically-encoded content + + // strong ETags need to be distinct depending on the encoding ("selected representation") + // see RFC 9110 section 8.8.3.3: + // https://www.rfc-editor.org/rfc/rfc9110.html#name-example-entity-tags-varying + // I don't know a great way to do this... how about appending? That's a neat trick! + // (We have to strip the value we append from If-None-Match headers before + // sending subsequent requests back upstream, however, since upstream handlers + // don't know about our appending to their Etag since they've already done their work) + if etag := rw.Header().Get("Etag"); etag != "" && !strings.HasPrefix(etag, "W/") { + etag = fmt.Sprintf(`%s-%s"`, strings.TrimSuffix(etag, `"`), rw.encodingName) + rw.Header().Set("Etag", etag) + } } } diff --git a/modules/caddyhttp/fileserver/browse.go b/modules/caddyhttp/fileserver/browse.go index e0ea171f..5db84931 100644 --- a/modules/caddyhttp/fileserver/browse.go +++ b/modules/caddyhttp/fileserver/browse.go @@ -30,6 +30,7 @@ import ( "sync" "text/tabwriter" "text/template" + "time" "go.uber.org/zap" @@ -104,6 +105,18 @@ func (fsrv *FileServer) serveBrowse(fileSystem fs.FS, root, dirPath string, w ht return caddyhttp.Error(http.StatusInternalServerError, err) } + w.Header().Add("Vary", "Accept") + + // speed up browser/client experience and caching by supporting If-Modified-Since + if ifModSinceStr := r.Header.Get("If-Modified-Since"); ifModSinceStr != "" { + ifModSince, err := time.ParseInLocation(http.TimeFormat, ifModSinceStr, time.Local) + lastModTrunc := listing.lastModified.Truncate(time.Second) + if err == nil && (lastModTrunc.Equal(ifModSince) || lastModTrunc.Before(ifModSince)) { + w.WriteHeader(http.StatusNotModified) + return nil + } + } + fsrv.browseApplyQueryParams(w, r, listing) buf := bufPool.Get().(*bytes.Buffer) @@ -111,6 +124,7 @@ func (fsrv *FileServer) serveBrowse(fileSystem fs.FS, root, dirPath string, w ht defer bufPool.Put(buf) acceptHeader := strings.ToLower(strings.Join(r.Header["Accept"], ",")) + w.Header().Set("Last-Modified", listing.lastModified.Format(http.TimeFormat)) switch { case strings.Contains(acceptHeader, "application/json"): diff --git a/modules/caddyhttp/fileserver/browsetplcontext.go b/modules/caddyhttp/fileserver/browsetplcontext.go index 5456f059..307cb9a4 100644 --- a/modules/caddyhttp/fileserver/browsetplcontext.go +++ b/modules/caddyhttp/fileserver/browsetplcontext.go @@ -63,6 +63,12 @@ func (fsrv *FileServer) directoryListing(ctx context.Context, fileSystem fs.FS, continue } + // keep track of the most recently modified item in the listing + modTime := info.ModTime() + if tplCtx.lastModified.IsZero() || modTime.After(tplCtx.lastModified) { + tplCtx.lastModified = modTime + } + isDir := entry.IsDir() || fsrv.isSymlinkTargetDir(fileSystem, info, root, urlPath) // add the slash after the escape of path to avoid escaping the slash as well @@ -108,7 +114,7 @@ func (fsrv *FileServer) directoryListing(ctx context.Context, fileSystem fs.FS, Name: name, Size: size, URL: u.String(), - ModTime: info.ModTime().UTC(), + ModTime: modTime.UTC(), Mode: info.Mode(), Tpl: tplCtx, // a reference up to the template context is useful SymlinkPath: symlinkPath, @@ -155,6 +161,10 @@ type browseTemplateContext struct { // Display format (list or grid) Layout string `json:"layout,omitempty"` + + // The most recent file modification date in the listing. + // Used for HTTP header purposes. + lastModified time.Time } // Breadcrumbs returns l.Path where every element maps diff --git a/modules/caddyhttp/fileserver/staticfiles.go b/modules/caddyhttp/fileserver/staticfiles.go index 8fba82a8..433e121d 100644 --- a/modules/caddyhttp/fileserver/staticfiles.go +++ b/modules/caddyhttp/fileserver/staticfiles.go @@ -644,19 +644,32 @@ func (fsrv *FileServer) notFound(w http.ResponseWriter, r *http.Request, next ca return caddyhttp.Error(http.StatusNotFound, nil) } -// calculateEtag produces a strong etag by default, although, for -// efficiency reasons, it does not actually consume the contents -// of the file to make a hash of all the bytes. ¯\_(ツ)_/¯ -// Prefix the etag with "W/" to convert it into a weak etag. -// See: https://tools.ietf.org/html/rfc7232#section-2.3 +// calculateEtag computes an entity tag using a strong validator +// without consuming the contents of the file. It requires the +// file info contain the correct size and modification time. +// It strives to implement the semantics regarding ETags as defined +// by RFC 9110 section 8.8.3 and 8.8.1. See +// https://www.rfc-editor.org/rfc/rfc9110.html#section-8.8.3. +// +// As our implementation uses file modification timestamp and size, +// note the following from RFC 9110 section 8.8.1: "A representation's +// modification time, if defined with only one-second resolution, +// might be a weak validator if it is possible for the representation to +// be modified twice during a single second and retrieved between those +// modifications." The ext4 file system, which underpins the vast majority +// of Caddy deployments, stores mod times with millisecond precision, +// which we consider precise enough to qualify as a strong validator. func calculateEtag(d os.FileInfo) string { - mtime := d.ModTime().Unix() - if mtime == 0 || mtime == 1 { + mtime := d.ModTime() + if mtimeUnix := mtime.Unix(); mtimeUnix == 0 || mtimeUnix == 1 { return "" // not useful anyway; see issue #5548 } - t := strconv.FormatInt(mtime, 36) - s := strconv.FormatInt(d.Size(), 36) - return `"` + t + s + `"` + var sb strings.Builder + sb.WriteRune('"') + sb.WriteString(strconv.FormatInt(mtime.UnixNano(), 36)) + sb.WriteString(strconv.FormatInt(d.Size(), 36)) + sb.WriteRune('"') + return sb.String() } // Finds the first corresponding etag file for a given file in the file system and return its content