1
Fork 0
mirror of https://github.com/caddyserver/caddy.git synced 2024-12-16 21:56:40 -05:00
caddy/modules/caddyhttp/rewrite/rewrite.go
Matthew Holt e43b6d8178 core: Variadic Context.Logger(); soft deprecation
Ideally I'd just remove the parameter to caddy.Context.Logger(), but
this would break most Caddy plugins.

Instead, I'm making it variadic and marking it as partially deprecated.
In the future, I might completely remove the parameter once most
plugins have updated.
2022-09-16 16:55:36 -06:00

470 lines
14 KiB
Go

// Copyright 2015 Matthew Holt and The Caddy Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package rewrite
import (
"fmt"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
"go.uber.org/zap"
)
func init() {
caddy.RegisterModule(Rewrite{})
}
// Rewrite is a middleware which can rewrite/mutate HTTP requests.
//
// The Method and URI properties are "setters" (the request URI
// will be overwritten with the given values). Other properties are
// "modifiers" (they modify existing values in a differentiable
// way). It is atypical to combine the use of setters and
// modifiers in a single rewrite.
//
// To ensure consistent behavior, prefix and suffix stripping is
// performed in the URL-decoded (unescaped, normalized) space by
// default except for the specific bytes where an escape sequence
// is used in the prefix or suffix pattern.
//
// For all modifiers, paths are cleaned before being modified so that
// multiple, consecutive slashes are collapsed into a single slash,
// and dot elements are resolved and removed. In the special case
// of a prefix, suffix, or substring containing "//" (repeated slashes),
// slashes will not be merged while cleaning the path so that
// the rewrite can be interpreted literally.
type Rewrite struct {
// Changes the request's HTTP verb.
Method string `json:"method,omitempty"`
// Changes the request's URI, which consists of path and query string.
// Only components of the URI that are specified will be changed.
// For example, a value of "/foo.html" or "foo.html" will only change
// the path and will preserve any existing query string. Similarly, a
// value of "?a=b" will only change the query string and will not affect
// the path. Both can also be changed: "/foo?a=b" - this sets both the
// path and query string at the same time.
//
// You can also use placeholders. For example, to preserve the existing
// query string, you might use: "?{http.request.uri.query}&a=b". Any
// key-value pairs you add to the query string will not overwrite
// existing values (individual pairs are append-only).
//
// To clear the query string, explicitly set an empty one: "?"
URI string `json:"uri,omitempty"`
// Strips the given prefix from the beginning of the URI path.
// The prefix should be written in normalized (unescaped) form,
// but if an escaping (`%xx`) is used, the path will be required
// to have that same escape at that position in order to match.
StripPathPrefix string `json:"strip_path_prefix,omitempty"`
// Strips the given suffix from the end of the URI path.
// The suffix should be written in normalized (unescaped) form,
// but if an escaping (`%xx`) is used, the path will be required
// to have that same escape at that position in order to match.
StripPathSuffix string `json:"strip_path_suffix,omitempty"`
// Performs substring replacements on the URI.
URISubstring []substrReplacer `json:"uri_substring,omitempty"`
// Performs regular expression replacements on the URI path.
PathRegexp []*regexReplacer `json:"path_regexp,omitempty"`
logger *zap.Logger
}
// CaddyModule returns the Caddy module information.
func (Rewrite) CaddyModule() caddy.ModuleInfo {
return caddy.ModuleInfo{
ID: "http.handlers.rewrite",
New: func() caddy.Module { return new(Rewrite) },
}
}
// Provision sets up rewr.
func (rewr *Rewrite) Provision(ctx caddy.Context) error {
rewr.logger = ctx.Logger()
for i, rep := range rewr.PathRegexp {
if rep.Find == "" {
return fmt.Errorf("path_regexp find cannot be empty")
}
re, err := regexp.Compile(rep.Find)
if err != nil {
return fmt.Errorf("compiling regular expression %d: %v", i, err)
}
rep.re = re
}
return nil
}
func (rewr Rewrite) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
logger := rewr.logger.With(
zap.Object("request", caddyhttp.LoggableHTTPRequest{Request: r}),
)
changed := rewr.Rewrite(r, repl)
if changed {
logger.Debug("rewrote request",
zap.String("method", r.Method),
zap.String("uri", r.RequestURI),
)
}
return next.ServeHTTP(w, r)
}
// rewrite performs the rewrites on r using repl, which should
// have been obtained from r, but is passed in for efficiency.
// It returns true if any changes were made to r.
func (rewr Rewrite) Rewrite(r *http.Request, repl *caddy.Replacer) bool {
oldMethod := r.Method
oldURI := r.RequestURI
// method
if rewr.Method != "" {
r.Method = strings.ToUpper(repl.ReplaceAll(rewr.Method, ""))
}
// uri (path, query string and... fragment, because why not)
if uri := rewr.URI; uri != "" {
// find the bounds of each part of the URI that exist
pathStart, qsStart, fragStart := -1, -1, -1
pathEnd, qsEnd := -1, -1
loop:
for i, ch := range uri {
switch {
case ch == '?' && qsStart < 0:
pathEnd, qsStart = i, i+1
case ch == '#' && fragStart < 0: // everything after fragment is fragment (very clear in RFC 3986 section 4.2)
if qsStart < 0 {
pathEnd = i
} else {
qsEnd = i
}
fragStart = i + 1
break loop
case pathStart < 0 && qsStart < 0:
pathStart = i
}
}
if pathStart >= 0 && pathEnd < 0 {
pathEnd = len(uri)
}
if qsStart >= 0 && qsEnd < 0 {
qsEnd = len(uri)
}
// isolate the three main components of the URI
var path, query, frag string
if pathStart > -1 {
path = uri[pathStart:pathEnd]
}
if qsStart > -1 {
query = uri[qsStart:qsEnd]
}
if fragStart > -1 {
frag = uri[fragStart:]
}
// build components which are specified, and store them
// in a temporary variable so that they all read the
// same version of the URI
var newPath, newQuery, newFrag string
if path != "" {
// Since the 'uri' placeholder performs a URL-encode,
// we need to intercept it so that it doesn't, because
// otherwise we risk a double-encode of the path.
uriPlaceholder := "{http.request.uri}"
if strings.Contains(path, uriPlaceholder) {
tmpUri := r.URL.Path
if r.URL.RawQuery != "" {
tmpUri += "?" + r.URL.RawQuery
}
path = strings.ReplaceAll(path, uriPlaceholder, tmpUri)
}
newPath = repl.ReplaceAll(path, "")
}
// before continuing, we need to check if a query string
// snuck into the path component during replacements
if before, after, found := strings.Cut(newPath, "?"); found {
// recompute; new path contains a query string
var injectedQuery string
newPath, injectedQuery = before, after
// don't overwrite explicitly-configured query string
if query == "" {
query = injectedQuery
}
}
if query != "" {
newQuery = buildQueryString(query, repl)
}
if frag != "" {
newFrag = repl.ReplaceAll(frag, "")
}
// update the URI with the new components
// only after building them
if pathStart >= 0 {
r.URL.Path = newPath
}
if qsStart >= 0 {
r.URL.RawQuery = newQuery
}
if fragStart >= 0 {
r.URL.Fragment = newFrag
}
}
// strip path prefix or suffix
if rewr.StripPathPrefix != "" {
prefix := repl.ReplaceAll(rewr.StripPathPrefix, "")
mergeSlashes := !strings.Contains(prefix, "//")
changePath(r, func(escapedPath string) string {
escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes)
return trimPathPrefix(escapedPath, prefix)
})
}
if rewr.StripPathSuffix != "" {
suffix := repl.ReplaceAll(rewr.StripPathSuffix, "")
mergeSlashes := !strings.Contains(suffix, "//")
changePath(r, func(escapedPath string) string {
escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes)
return reverse(trimPathPrefix(reverse(escapedPath), reverse(suffix)))
})
}
// substring replacements in URI
for _, rep := range rewr.URISubstring {
rep.do(r, repl)
}
// regular expression replacements on the path
for _, rep := range rewr.PathRegexp {
rep.do(r, repl)
}
// update the encoded copy of the URI
r.RequestURI = r.URL.RequestURI()
// return true if anything changed
return r.Method != oldMethod || r.RequestURI != oldURI
}
// buildQueryString takes an input query string and
// performs replacements on each component, returning
// the resulting query string. This function appends
// duplicate keys rather than replaces.
func buildQueryString(qs string, repl *caddy.Replacer) string {
var sb strings.Builder
// first component must be key, which is the same
// as if we just wrote a value in previous iteration
wroteVal := true
for len(qs) > 0 {
// determine the end of this component, which will be at
// the next equal sign or ampersand, whichever comes first
nextEq, nextAmp := strings.Index(qs, "="), strings.Index(qs, "&")
ampIsNext := nextAmp >= 0 && (nextAmp < nextEq || nextEq < 0)
end := len(qs) // assume no delimiter remains...
if ampIsNext {
end = nextAmp // ...unless ampersand is first...
} else if nextEq >= 0 && (nextEq < nextAmp || nextAmp < 0) {
end = nextEq // ...or unless equal is first.
}
// consume the component and write the result
comp := qs[:end]
comp, _ = repl.ReplaceFunc(comp, func(name string, val any) (any, error) {
if name == "http.request.uri.query" && wroteVal {
return val, nil // already escaped
}
var valStr string
switch v := val.(type) {
case string:
valStr = v
case fmt.Stringer:
valStr = v.String()
case int:
valStr = strconv.Itoa(v)
default:
valStr = fmt.Sprintf("%+v", v)
}
return url.QueryEscape(valStr), nil
})
if end < len(qs) {
end++ // consume delimiter
}
qs = qs[end:]
// if previous iteration wrote a value,
// that means we are writing a key
if wroteVal {
if sb.Len() > 0 && len(comp) > 0 {
sb.WriteRune('&')
}
} else {
sb.WriteRune('=')
}
sb.WriteString(comp)
// remember for the next iteration that we just wrote a value,
// which means the next iteration MUST write a key
wroteVal = ampIsNext
}
return sb.String()
}
// trimPathPrefix is like strings.TrimPrefix, but customized for advanced URI
// path prefix matching. The string prefix will be trimmed from the beginning
// of escapedPath if escapedPath starts with prefix. Rather than a naive 1:1
// comparison of each byte to determine if escapedPath starts with prefix,
// both strings are iterated in lock-step, and if prefix has a '%' encoding
// at a particular position, escapedPath must also have the same encoding
// representation for that character. In other words, if the prefix string
// uses the escaped form for a character, escapedPath must literally use the
// same escape at that position. Otherwise, all character comparisons are
// performed in normalized/unescaped space.
func trimPathPrefix(escapedPath, prefix string) string {
var iPath, iPrefix int
for {
if iPath >= len(escapedPath) || iPrefix >= len(prefix) {
break
}
prefixCh := prefix[iPrefix]
ch := string(escapedPath[iPath])
if ch == "%" && prefixCh != '%' && len(escapedPath) >= iPath+3 {
var err error
ch, err = url.PathUnescape(escapedPath[iPath : iPath+3])
if err != nil {
// should be impossible unless EscapedPath() is returning invalid values!
return escapedPath
}
iPath += 2
}
// prefix comparisons are case-insensitive to consistency with
// path matcher, which is case-insensitive for good reasons
if !strings.EqualFold(ch, string(prefixCh)) {
return escapedPath
}
iPath++
iPrefix++
}
// found matching prefix, trim it
return escapedPath[iPath:]
}
func reverse(s string) string {
r := []rune(s)
for i, j := 0, len(r)-1; i < len(r)/2; i, j = i+1, j-1 {
r[i], r[j] = r[j], r[i]
}
return string(r)
}
// substrReplacer describes either a simple and fast substring replacement.
type substrReplacer struct {
// A substring to find. Supports placeholders.
Find string `json:"find,omitempty"`
// The substring to replace with. Supports placeholders.
Replace string `json:"replace,omitempty"`
// Maximum number of replacements per string.
// Set to <= 0 for no limit (default).
Limit int `json:"limit,omitempty"`
}
// do performs the substring replacement on r.
func (rep substrReplacer) do(r *http.Request, repl *caddy.Replacer) {
if rep.Find == "" {
return
}
lim := rep.Limit
if lim == 0 {
lim = -1
}
find := repl.ReplaceAll(rep.Find, "")
replace := repl.ReplaceAll(rep.Replace, "")
mergeSlashes := !strings.Contains(rep.Find, "//")
changePath(r, func(pathOrRawPath string) string {
return strings.Replace(caddyhttp.CleanPath(pathOrRawPath, mergeSlashes), find, replace, lim)
})
r.URL.RawQuery = strings.Replace(r.URL.RawQuery, find, replace, lim)
}
// regexReplacer describes a replacement using a regular expression.
type regexReplacer struct {
// The regular expression to find.
Find string `json:"find,omitempty"`
// The substring to replace with. Supports placeholders and
// regular expression capture groups.
Replace string `json:"replace,omitempty"`
re *regexp.Regexp
}
func (rep regexReplacer) do(r *http.Request, repl *caddy.Replacer) {
if rep.Find == "" || rep.re == nil {
return
}
replace := repl.ReplaceAll(rep.Replace, "")
changePath(r, func(pathOrRawPath string) string {
return rep.re.ReplaceAllString(pathOrRawPath, replace)
})
}
func changePath(req *http.Request, newVal func(pathOrRawPath string) string) {
req.URL.RawPath = newVal(req.URL.EscapedPath())
if p, err := url.PathUnescape(req.URL.RawPath); err == nil && p != "" {
req.URL.Path = p
} else {
req.URL.Path = newVal(req.URL.Path)
}
// RawPath is only set if it's different from the normalized Path (std lib)
if req.URL.RawPath == req.URL.Path {
req.URL.RawPath = ""
}
}
// Interface guard
var _ caddyhttp.MiddlewareHandler = (*Rewrite)(nil)