caddy/modules/caddyhttp/matchers.go

// Copyright 2015 Matthew Holt and The Caddy Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package caddyhttp

import (
	"encoding/json"
	"errors"
	"fmt"
	"net"
	"net/http"
	"net/textproto"
	"net/url"
	"path"
	"reflect"
	"regexp"
	"runtime"
	"sort"
	"strconv"
	"strings"

	"github.com/google/cel-go/cel"
	"github.com/google/cel-go/common/types"
	"github.com/google/cel-go/common/types/ref"

	"github.com/caddyserver/caddy/v2"
	"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
)

type (
	// MatchHost matches requests by the Host value (case-insensitive).
	//
	// When used in a top-level HTTP route,
	// [qualifying domain names](/docs/automatic-https#hostname-requirements)
	// may trigger [automatic HTTPS](/docs/automatic-https), which automatically
	// provisions and renews certificates for you. Before doing this, you
	// should ensure that DNS records for these domains are properly configured,
	// especially A/AAAA pointed at your server.
	//
	// Automatic HTTPS can be
	// [customized or disabled](/docs/modules/http#servers/automatic_https).
	//
	// Wildcards (`*`) may be used to represent exactly one label of the
	// hostname, in accordance with RFC 1034 (because host matchers are also
	// used for automatic HTTPS which influences TLS certificates). Thus,
	// a host of `*` matches hosts like `localhost` or `internal` but not
	// `example.com`. To catch all hosts, omit the host matcher entirely.
	//
	// The wildcard can be useful for matching all subdomains, for example:
	// `*.example.com` matches `foo.example.com` but not `foo.bar.example.com`.
	//
	// Duplicate entries will return an error.
	MatchHost []string

	// MatchPath case-insensitively matches requests by the URI's path. Path
	// matching is exact, not prefix-based, giving you more control and clarity
	// over matching. Wildcards (`*`) may be used:
	//
	// - At the end only, for a prefix match (`/prefix/*`)
	// - At the beginning only, for a suffix match (`*.suffix`)
	// - On both sides only, for a substring match (`*/contains/*`)
	// - In the middle, for a globular match (`/accounts/*/info`)
	//
	// Slashes are significant; i.e. `/foo*` matches `/foo`, `/foo/`, `/foo/bar`,
	// and `/foobar`; but `/foo/*` does not match `/foo` or `/foobar`. Valid
	// paths start with a slash `/`.
	//
	// Because there are, in general, multiple possible escaped forms of any
	// path, path matchers operate in unescaped space; that is, path matchers
	// should be written in their unescaped form to prevent ambiguities and
	// possible security issues, as all request paths will be normalized to
	// their unescaped forms before matcher evaluation.
	//
	// However, escape sequences in a match pattern are supported; they are
	// compared with the request's raw/escaped path for those bytes only.
	// In other words, a matcher of `/foo%2Fbar` will match a request path
	// of precisely `/foo%2Fbar`, but not `/foo/bar`. It follows that matching
	// the literal percent sign (%) in normalized space can be done using the
	// escaped form, `%25`.
	//
	// Even though wildcards (`*`) operate in the normalized space, the special
	// escaped wildcard (`%*`), which is not a valid escape sequence, may be
	// used in place of a span that should NOT be decoded; that is, `/bands/%*`
	// will match `/bands/AC%2fDC` whereas `/bands/*` will not.
	//
	// Even though path matching is done in normalized space, the special
	// wildcard `%*` may be used in place of a span that should NOT be decoded;
	// that is, `/bands/%*/` will match `/bands/AC%2fDC/` whereas `/bands/*/`
	// will not.
	//
	// This matcher is fast, so it does not support regular expressions or
	// capture groups. For slower but more powerful matching, use the
	// path_regexp matcher. (Note that due to the special treatment of
	// escape sequences in matcher patterns, they may perform slightly slower
	// in high-traffic environments.)
	MatchPath []string

	// MatchPathRE matches requests by a regular expression on the URI's path.
	// Path matching is performed in the unescaped (decoded) form of the path.
	//
	// Upon a match, it adds placeholders to the request: `{http.regexp.name.capture_group}`
	// where `name` is the regular expression's name, and `capture_group` is either
	// the named or positional capture group from the expression itself. If no name
	// is given, then the placeholder omits the name: `{http.regexp.capture_group}`
	// (potentially leading to collisions).
	MatchPathRE struct{ MatchRegexp }

	// MatchMethod matches requests by the method.
	MatchMethod []string

	// MatchQuery matches requests by the URI's query string. It takes a JSON object
	// keyed by the query keys, with an array of string values to match for that key.
	// Query key matches are exact, but wildcards may be used for value matches. Both
	// keys and values may be placeholders.
	//
	// An example of the structure to match `?key=value&topic=api&query=something` is:
	//
	// ```json
	// {
	// 	"key": ["value"],
	//	"topic": ["api"],
	//	"query": ["*"]
	// }
	// ```
	//
	// Invalid query strings, including those with bad escapings or illegal characters
	// like semicolons, will fail to parse and thus fail to match.
	//
	// **NOTE:** Notice that query string values are arrays, not singular values. This is
	// because repeated keys are valid in query strings, and each one may have a
	// different value. This matcher will match for a key if any one of its configured
	// values is assigned in the query string. Backend applications relying on query
	// strings MUST take into consideration that query string values are arrays and can
	// have multiple values.
	MatchQuery url.Values

	// MatchHeader matches requests by header fields. The key is the field
	// name and the array is the list of field values. It performs fast,
	// exact string comparisons of the field values. Fast prefix, suffix,
	// and substring matches can also be done by suffixing, prefixing, or
	// surrounding the value with the wildcard `*` character, respectively.
	// If a list is null, the header must not exist. If the list is empty,
	// the field must simply exist, regardless of its value.
	//
	// **NOTE:** Notice that header values are arrays, not singular values. This is
	// because repeated fields are valid in headers, and each one may have a
	// different value. This matcher will match for a field if any one of its configured
	// values matches in the header. Backend applications relying on headers MUST take
	// into consideration that header field values are arrays and can have multiple
	// values.
	MatchHeader http.Header

	// MatchHeaderRE matches requests by a regular expression on header fields.
	//
	// Upon a match, it adds placeholders to the request: `{http.regexp.name.capture_group}`
	// where `name` is the regular expression's name, and `capture_group` is either
	// the named or positional capture group from the expression itself. If no name
	// is given, then the placeholder omits the name: `{http.regexp.capture_group}`
	// (potentially leading to collisions).
	MatchHeaderRE map[string]*MatchRegexp

	// MatchProtocol matches requests by protocol. Recognized values are
	// "http", "https", and "grpc" for broad protocol matches, or specific
	// HTTP versions can be specified like so: "http/1", "http/1.1",
	// "http/2", "http/3", or minimum versions: "http/2+", etc.
	MatchProtocol string

	// MatchNot matches requests by negating the results of its matcher
	// sets. A single "not" matcher takes one or more matcher sets. Each
	// matcher set is OR'ed; in other words, if any matcher set returns
	// true, the final result of the "not" matcher is false. Individual
	// matchers within a set work the same (i.e. different matchers in
	// the same set are AND'ed).
	//
	// NOTE: The generated docs which describe the structure of this
	// module are wrong because of how this type unmarshals JSON in a
	// custom way. The correct structure is:
	//
	// ```json
	// [
	// 	{},
	// 	{}
	// ]
	// ```
	//
	// where each of the array elements is a matcher set, i.e. an
	// object keyed by matcher name.
	MatchNot struct {
		MatcherSetsRaw []caddy.ModuleMap `json:"-" caddy:"namespace=http.matchers"`
		MatcherSets    []MatcherSet      `json:"-"`
	}
)

func init() {
	caddy.RegisterModule(MatchHost{})
	caddy.RegisterModule(MatchPath{})
	caddy.RegisterModule(MatchPathRE{})
	caddy.RegisterModule(MatchMethod{})
	caddy.RegisterModule(MatchQuery{})
	caddy.RegisterModule(MatchHeader{})
	caddy.RegisterModule(MatchHeaderRE{})
	caddy.RegisterModule(new(MatchProtocol))
	caddy.RegisterModule(MatchNot{})
}

// CaddyModule returns the Caddy module information.
func (MatchHost) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.host",
		New: func() caddy.Module { return new(MatchHost) },
	}
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchHost) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	for d.Next() {
		*m = append(*m, d.RemainingArgs()...)
		if d.NextBlock(0) {
			return d.Err("malformed host matcher: blocks are not supported")
		}
	}
	return nil
}

// Provision sets up and validates m, including making it more efficient for large lists.
func (m MatchHost) Provision(_ caddy.Context) error {
	// check for duplicates; they are nonsensical and reduce efficiency
	// (we could just remove them, but the user should know their config is erroneous)
	seen := make(map[string]int)
	for i, h := range m {
		h = strings.ToLower(h)
		if firstI, ok := seen[h]; ok {
			return fmt.Errorf("host at index %d is repeated at index %d: %s", firstI, i, h)
		}
		seen[h] = i
	}

	if m.large() {
		// sort the slice lexicographically, grouping "fuzzy" entries (wildcards and placeholders)
		// at the front of the list; this allows us to use binary search for exact matches, which
		// we have seen from experience is the most common kind of value in large lists; and any
		// other kinds of values (wildcards and placeholders) are grouped in front so the linear
		// search should find a match fairly quickly
		sort.Slice(m, func(i, j int) bool {
			iInexact, jInexact := m.fuzzy(m[i]), m.fuzzy(m[j])
			if iInexact && !jInexact {
				return true
			}
			if !iInexact && jInexact {
				return false
			}
			return m[i] < m[j]
		})
	}

	return nil
}

// Match returns true if r matches m.
func (m MatchHost) Match(r *http.Request) bool {
	reqHost, _, err := net.SplitHostPort(r.Host)
	if err != nil {
		// OK; probably didn't have a port
		reqHost = r.Host

		// make sure we strip the brackets from IPv6 addresses
		reqHost = strings.TrimPrefix(reqHost, "[")
		reqHost = strings.TrimSuffix(reqHost, "]")
	}

	if m.large() {
		// fast path: locate exact match using binary search (about 100-1000x faster for large lists)
		pos := sort.Search(len(m), func(i int) bool {
			if m.fuzzy(m[i]) {
				return false
			}
			return m[i] >= reqHost
		})
		if pos < len(m) && m[pos] == reqHost {
			return true
		}
	}

	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)

outer:
	for _, host := range m {
		// fast path: if matcher is large, we already know we don't have an exact
		// match, so we're only looking for fuzzy match now, which should be at the
		// front of the list; if we have reached a value that is not fuzzy, there
		// will be no match and we can short-circuit for efficiency
		if m.large() && !m.fuzzy(host) {
			break
		}

		host = repl.ReplaceAll(host, "")
		if strings.Contains(host, "*") {
			patternParts := strings.Split(host, ".")
			incomingParts := strings.Split(reqHost, ".")
			if len(patternParts) != len(incomingParts) {
				continue
			}
			for i := range patternParts {
				if patternParts[i] == "*" {
					continue
				}
				if !strings.EqualFold(patternParts[i], incomingParts[i]) {
					continue outer
				}
			}
			return true
		} else if strings.EqualFold(reqHost, host) {
			return true
		}
	}

	return false
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression host('localhost')
func (MatchHost) CELLibrary(ctx caddy.Context) (cel.Library, error) {
	return CELMatcherImpl(
		"host",
		"host_match_request_list",
		[]*cel.Type{cel.ListType(cel.StringType)},
		func(data ref.Val) (RequestMatcher, error) {
			refStringList := reflect.TypeOf([]string{})
			strList, err := data.ConvertToNative(refStringList)
			if err != nil {
				return nil, err
			}
			matcher := MatchHost(strList.([]string))
			err = matcher.Provision(ctx)
			return matcher, err
		},
	)
}

// fuzzy returns true if the given hostname h is not a specific
// hostname, e.g. has placeholders or wildcards.
func (MatchHost) fuzzy(h string) bool { return strings.ContainsAny(h, "{*") }

// large returns true if m is considered to be large. Optimizing
// the matcher for smaller lists has diminishing returns.
// See related benchmark function in test file to conduct experiments.
func (m MatchHost) large() bool { return len(m) > 100 }

// CaddyModule returns the Caddy module information.
func (MatchPath) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.path",
		New: func() caddy.Module { return new(MatchPath) },
	}
}

// Provision lower-cases the paths in m to ensure case-insensitive matching.
func (m MatchPath) Provision(_ caddy.Context) error {
	for i := range m {
		if m[i] == "*" && i > 0 {
			// will always match, so just put it first
			m[0] = m[i]
			break
		}
		m[i] = strings.ToLower(m[i])
	}
	return nil
}

// Match returns true if r matches m.
func (m MatchPath) Match(r *http.Request) bool {
	// Even though RFC 9110 says that path matching is case-sensitive
	// (https://www.rfc-editor.org/rfc/rfc9110.html#section-4.2.3),
	// we do case-insensitive matching to mitigate security issues
	// related to differences between operating systems, applications,
	// etc; if case-sensitive matching is needed, the regex matcher
	// can be used instead.
	reqPath := strings.ToLower(r.URL.Path)

	// See #2917; Windows ignores trailing dots and spaces
	// when accessing files (sigh), potentially causing a
	// security risk (cry) if PHP files end up being served
	// as static files, exposing the source code, instead of
	// being matched by *.php to be treated as PHP scripts.
	if runtime.GOOS == "windows" { // issue #5613
		reqPath = strings.TrimRight(reqPath, ". ")
	}

	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)

	for _, matchPattern := range m {
		matchPattern = repl.ReplaceAll(matchPattern, "")

		// special case: whole path is wildcard; this is unnecessary
		// as it matches all requests, which is the same as no matcher
		if matchPattern == "*" {
			return true
		}

		// Clean the path, merge doubled slashes, etc.
		// This ensures maliciously crafted requests can't bypass
		// the path matcher. See #4407. Good security posture
		// requires that we should do all we can to reduce any
		// funny-looking paths into "normalized" forms such that
		// weird variants can't sneak by.
		//
		// How we clean the path depends on the kind of pattern:
		// we either merge slashes or we don't. If the pattern
		// has double slashes, we preserve them in the path.
		//
		// TODO: Despite the fact that the *vast* majority of path
		// matchers have only 1 pattern, a possible optimization is
		// to remember the cleaned form of the path for future
		// iterations; it's just that the way we clean depends on
		// the kind of pattern.

		mergeSlashes := !strings.Contains(matchPattern, "//")

		// if '%' appears in the match pattern, we interpret that to mean
		// the intent is to compare that part of the path in raw/escaped
		// space; i.e. "%40"=="%40", not "@", and "%2F"=="%2F", not "/"
		if strings.Contains(matchPattern, "%") {
			reqPathForPattern := CleanPath(r.URL.EscapedPath(), mergeSlashes)
			if m.matchPatternWithEscapeSequence(reqPathForPattern, matchPattern) {
				return true
			}

			// doing prefix/suffix/substring matches doesn't make sense
			continue
		}

		reqPathForPattern := CleanPath(reqPath, mergeSlashes)

		// for substring, prefix, and suffix matching, only perform those
		// special, fast matches if they are the only wildcards in the pattern;
		// otherwise we assume a globular match if any * appears in the middle

		// special case: first and last characters are wildcard,
		// treat it as a fast substring match
		if strings.Count(matchPattern, "*") == 2 &&
			strings.HasPrefix(matchPattern, "*") &&
			strings.HasSuffix(matchPattern, "*") &&
			strings.Count(matchPattern, "*") == 2 {
			if strings.Contains(reqPathForPattern, matchPattern[1:len(matchPattern)-1]) {
				return true
			}
			continue
		}

		// only perform prefix/suffix match if it is the only wildcard...
		// I think that is more correct most of the time
		if strings.Count(matchPattern, "*") == 1 {
			// special case: first character is a wildcard,
			// treat it as a fast suffix match
			if strings.HasPrefix(matchPattern, "*") {
				if strings.HasSuffix(reqPathForPattern, matchPattern[1:]) {
					return true
				}
				continue
			}

			// special case: last character is a wildcard,
			// treat it as a fast prefix match
			if strings.HasSuffix(matchPattern, "*") {
				if strings.HasPrefix(reqPathForPattern, matchPattern[:len(matchPattern)-1]) {
					return true
				}
				continue
			}
		}

		// at last, use globular matching, which also is exact matching
		// if there are no glob/wildcard chars; we ignore the error here
		// because we can't handle it anyway
		matches, _ := path.Match(matchPattern, reqPathForPattern)
		if matches {
			return true
		}
	}
	return false
}

func (MatchPath) matchPatternWithEscapeSequence(escapedPath, matchPath string) bool {
	// We would just compare the pattern against r.URL.Path,
	// but the pattern contains %, indicating that we should
	// compare at least some part of the path in raw/escaped
	// space, not normalized space; so we build the string we
	// will compare against by adding the normalized parts
	// of the path, then switching to the escaped parts where
	// the pattern hints to us wherever % is present.
	var sb strings.Builder

	// iterate the pattern and escaped path in lock-step;
	// increment iPattern every time we consume a char from the pattern,
	// increment iPath every time we consume a char from the path;
	// iPattern and iPath are our cursors/iterator positions for each string
	var iPattern, iPath int
	for {
		if iPattern >= len(matchPath) || iPath >= len(escapedPath) {
			break
		}

		// get the next character from the request path

		pathCh := string(escapedPath[iPath])
		var escapedPathCh string

		// normalize (decode) escape sequences
		if pathCh == "%" && len(escapedPath) >= iPath+3 {
			// hold onto this in case we find out the intent is to match in escaped space here;
			// we lowercase it even though technically the spec says: "For consistency, URI
			// producers and normalizers should use uppercase hexadecimal digits for all percent-
			// encodings" (RFC 3986 section 2.1) - we lowercased the matcher pattern earlier in
			// provisioning so we do the same here to gain case-insensitivity in equivalence;
			// besides, this string is never shown visibly
			escapedPathCh = strings.ToLower(escapedPath[iPath : iPath+3])

			var err error
			pathCh, err = url.PathUnescape(escapedPathCh)
			if err != nil {
				// should be impossible unless EscapedPath() is giving us an invalid sequence!
				return false
			}
			iPath += 2 // escape sequence is 2 bytes longer than normal char
		}

		// now get the next character from the pattern

		normalize := true
		switch matchPath[iPattern] {
		case '%':
			// escape sequence

			// if not a wildcard ("%*"), compare literally; consume next two bytes of pattern
			if len(matchPath) >= iPattern+3 && matchPath[iPattern+1] != '*' {
				sb.WriteString(escapedPathCh)
				iPath++
				iPattern += 2
				break
			}

			// escaped wildcard sequence; consume next byte only ('*')
			iPattern++
			normalize = false

			fallthrough
		case '*':
			// wildcard, so consume until next matching character
			remaining := escapedPath[iPath:]
			until := len(escapedPath) - iPath // go until end of string...
			if iPattern < len(matchPath)-1 {  // ...unless the * is not at the end
				nextCh := matchPath[iPattern+1]
				until = strings.IndexByte(remaining, nextCh)
				if until == -1 {
					// terminating char of wildcard span not found, so definitely no match
					return false
				}
			}
			if until == 0 {
				// empty span; nothing to add on this iteration
				break
			}
			next := remaining[:until]
			if normalize {
				var err error
				next, err = url.PathUnescape(next)
				if err != nil {
					return false // should be impossible anyway
				}
			}
			sb.WriteString(next)
			iPath += until
		default:
			sb.WriteString(pathCh)
			iPath++
		}

		iPattern++
	}

	// we can now treat rawpath globs (%*) as regular globs (*)
	matchPath = strings.ReplaceAll(matchPath, "%*", "*")

	// ignore error here because we can't handle it anyway=
	matches, _ := path.Match(matchPath, sb.String())
	return matches
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression path('*substring*', '*suffix')
func (MatchPath) CELLibrary(ctx caddy.Context) (cel.Library, error) {
	return CELMatcherImpl(
		// name of the macro, this is the function name that users see when writing expressions.
		"path",
		// name of the function that the macro will be rewritten to call.
		"path_match_request_list",
		// internal data type of the MatchPath value.
		[]*cel.Type{cel.ListType(cel.StringType)},
		// function to convert a constant list of strings to a MatchPath instance.
		func(data ref.Val) (RequestMatcher, error) {
			refStringList := reflect.TypeOf([]string{})
			strList, err := data.ConvertToNative(refStringList)
			if err != nil {
				return nil, err
			}
			matcher := MatchPath(strList.([]string))
			err = matcher.Provision(ctx)
			return matcher, err
		},
	)
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchPath) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	for d.Next() {
		*m = append(*m, d.RemainingArgs()...)
		if d.NextBlock(0) {
			return d.Err("malformed path matcher: blocks are not supported")
		}
	}
	return nil
}

// CaddyModule returns the Caddy module information.
func (MatchPathRE) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.path_regexp",
		New: func() caddy.Module { return new(MatchPathRE) },
	}
}

// Match returns true if r matches m.
func (m MatchPathRE) Match(r *http.Request) bool {
	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)

	// Clean the path, merges doubled slashes, etc.
	// This ensures maliciously crafted requests can't bypass
	// the path matcher. See #4407
	cleanedPath := cleanPath(r.URL.Path)

	return m.MatchRegexp.Match(cleanedPath, repl)
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression path_regexp('^/bar')
func (MatchPathRE) CELLibrary(ctx caddy.Context) (cel.Library, error) {
	unnamedPattern, err := CELMatcherImpl(
		"path_regexp",
		"path_regexp_request_string",
		[]*cel.Type{cel.StringType},
		func(data ref.Val) (RequestMatcher, error) {
			pattern := data.(types.String)
			matcher := MatchPathRE{MatchRegexp{Pattern: string(pattern)}}
			err := matcher.Provision(ctx)
			return matcher, err
		},
	)
	if err != nil {
		return nil, err
	}
	namedPattern, err := CELMatcherImpl(
		"path_regexp",
		"path_regexp_request_string_string",
		[]*cel.Type{cel.StringType, cel.StringType},
		func(data ref.Val) (RequestMatcher, error) {
			refStringList := reflect.TypeOf([]string{})
			params, err := data.ConvertToNative(refStringList)
			if err != nil {
				return nil, err
			}
			strParams := params.([]string)
			matcher := MatchPathRE{MatchRegexp{Name: strParams[0], Pattern: strParams[1]}}
			err = matcher.Provision(ctx)
			return matcher, err
		},
	)
	if err != nil {
		return nil, err
	}
	envOpts := append(unnamedPattern.CompileOptions(), namedPattern.CompileOptions()...)
	prgOpts := append(unnamedPattern.ProgramOptions(), namedPattern.ProgramOptions()...)
	return NewMatcherCELLibrary(envOpts, prgOpts), nil
}

// CaddyModule returns the Caddy module information.
func (MatchMethod) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.method",
		New: func() caddy.Module { return new(MatchMethod) },
	}
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchMethod) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	for d.Next() {
		*m = append(*m, d.RemainingArgs()...)
		if d.NextBlock(0) {
			return d.Err("malformed method matcher: blocks are not supported")
		}
	}
	return nil
}

// Match returns true if r matches m.
func (m MatchMethod) Match(r *http.Request) bool {
	for _, method := range m {
		if r.Method == method {
			return true
		}
	}
	return false
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression method('PUT', 'POST')
func (MatchMethod) CELLibrary(_ caddy.Context) (cel.Library, error) {
	return CELMatcherImpl(
		"method",
		"method_request_list",
		[]*cel.Type{cel.ListType(cel.StringType)},
		func(data ref.Val) (RequestMatcher, error) {
			refStringList := reflect.TypeOf([]string{})
			strList, err := data.ConvertToNative(refStringList)
			if err != nil {
				return nil, err
			}
			return MatchMethod(strList.([]string)), nil
		},
	)
}

// CaddyModule returns the Caddy module information.
func (MatchQuery) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.query",
		New: func() caddy.Module { return new(MatchQuery) },
	}
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchQuery) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	if *m == nil {
		*m = make(map[string][]string)
	}
	for d.Next() {
		for _, query := range d.RemainingArgs() {
			if query == "" {
				continue
			}
			before, after, found := strings.Cut(query, "=")
			if !found {
				return d.Errf("malformed query matcher token: %s; must be in param=val format", d.Val())
			}
			url.Values(*m).Add(before, after)
		}
		if d.NextBlock(0) {
			return d.Err("malformed query matcher: blocks are not supported")
		}
	}
	return nil
}

// Match returns true if r matches m. An empty m matches an empty query string.
func (m MatchQuery) Match(r *http.Request) bool {
	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)

	// parse query string just once, for efficiency
	parsed, err := url.ParseQuery(r.URL.RawQuery)
	if err != nil {
		// Illegal query string. Likely bad escape sequence or unescaped literals.
		// Note that semicolons in query string have a controversial history. Summaries:
		// - https://github.com/golang/go/issues/50034
		// - https://github.com/golang/go/issues/25192
		// Despite the URL WHATWG spec mandating the use of & separators for query strings,
		// every URL parser implementation is different, and Filippo Valsorda rightly wrote:
		// "Relying on parser alignment for security is doomed." Overall conclusion is that
		// splitting on & and rejecting ; in key=value pairs is safer than accepting raw ;.
		// We regard the Go team's decision as sound and thus reject malformed query strings.
		return false
	}

	for param, vals := range m {
		param = repl.ReplaceAll(param, "")
		paramVal, found := parsed[param]
		if found {
			for _, v := range vals {
				v = repl.ReplaceAll(v, "")
				if paramVal[0] == v || v == "*" {
					return true
				}
			}
		}
	}
	return len(m) == 0 && len(r.URL.Query()) == 0
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression query({'sort': 'asc'}) || query({'foo': ['*bar*', 'baz']})
func (MatchQuery) CELLibrary(_ caddy.Context) (cel.Library, error) {
	return CELMatcherImpl(
		"query",
		"query_matcher_request_map",
		[]*cel.Type{CELTypeJSON},
		func(data ref.Val) (RequestMatcher, error) {
			mapStrListStr, err := CELValueToMapStrList(data)
			if err != nil {
				return nil, err
			}
			return MatchQuery(url.Values(mapStrListStr)), nil
		},
	)
}

// CaddyModule returns the Caddy module information.
func (MatchHeader) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.header",
		New: func() caddy.Module { return new(MatchHeader) },
	}
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchHeader) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	if *m == nil {
		*m = make(map[string][]string)
	}
	for d.Next() {
		var field, val string
		if !d.Args(&field) {
			return d.Errf("malformed header matcher: expected field")
		}

		if strings.HasPrefix(field, "!") {
			if len(field) == 1 {
				return d.Errf("malformed header matcher: must have field name following ! character")
			}

			field = field[1:]
			headers := *m
			headers[field] = nil
			m = &headers
			if d.NextArg() {
				return d.Errf("malformed header matcher: null matching headers cannot have a field value")
			}
		} else {
			if !d.NextArg() {
				return d.Errf("malformed header matcher: expected both field and value")
			}

			// If multiple header matchers with the same header field are defined,
			// we want to add the existing to the list of headers (will be OR'ed)
			val = d.Val()
			http.Header(*m).Add(field, val)
		}

		if d.NextBlock(0) {
			return d.Err("malformed header matcher: blocks are not supported")
		}
	}
	return nil
}

// Match returns true if r matches m.
func (m MatchHeader) Match(r *http.Request) bool {
	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
	return matchHeaders(r.Header, http.Header(m), r.Host, repl)
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression header({'content-type': 'image/png'})
//	expression header({'foo': ['bar', 'baz']}) // match bar or baz
func (MatchHeader) CELLibrary(_ caddy.Context) (cel.Library, error) {
	return CELMatcherImpl(
		"header",
		"header_matcher_request_map",
		[]*cel.Type{CELTypeJSON},
		func(data ref.Val) (RequestMatcher, error) {
			mapStrListStr, err := CELValueToMapStrList(data)
			if err != nil {
				return nil, err
			}
			return MatchHeader(http.Header(mapStrListStr)), nil
		},
	)
}

// getHeaderFieldVals returns the field values for the given fieldName from input.
// The host parameter should be obtained from the http.Request.Host field since
// net/http removes it from the header map.
func getHeaderFieldVals(input http.Header, fieldName, host string) []string {
	fieldName = textproto.CanonicalMIMEHeaderKey(fieldName)
	if fieldName == "Host" && host != "" {
		return []string{host}
	}
	return input[fieldName]
}

// matchHeaders returns true if input matches the criteria in against without regex.
// The host parameter should be obtained from the http.Request.Host field since
// net/http removes it from the header map.
func matchHeaders(input, against http.Header, host string, repl *caddy.Replacer) bool {
	for field, allowedFieldVals := range against {
		actualFieldVals := getHeaderFieldVals(input, field, host)
		if allowedFieldVals != nil && len(allowedFieldVals) == 0 && actualFieldVals != nil {
			// a non-nil but empty list of allowed values means
			// match if the header field exists at all
			continue
		}
		if allowedFieldVals == nil && actualFieldVals == nil {
			// a nil list means match if the header does not exist at all
			continue
		}
		var match bool
	fieldVals:
		for _, actualFieldVal := range actualFieldVals {
			for _, allowedFieldVal := range allowedFieldVals {
				if repl != nil {
					allowedFieldVal = repl.ReplaceAll(allowedFieldVal, "")
				}
				switch {
				case allowedFieldVal == "*":
					match = true
				case strings.HasPrefix(allowedFieldVal, "*") && strings.HasSuffix(allowedFieldVal, "*"):
					match = strings.Contains(actualFieldVal, allowedFieldVal[1:len(allowedFieldVal)-1])
				case strings.HasPrefix(allowedFieldVal, "*"):
					match = strings.HasSuffix(actualFieldVal, allowedFieldVal[1:])
				case strings.HasSuffix(allowedFieldVal, "*"):
					match = strings.HasPrefix(actualFieldVal, allowedFieldVal[:len(allowedFieldVal)-1])
				default:
					match = actualFieldVal == allowedFieldVal
				}
				if match {
					break fieldVals
				}
			}
		}
		if !match {
			return false
		}
	}
	return true
}

// CaddyModule returns the Caddy module information.
func (MatchHeaderRE) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.header_regexp",
		New: func() caddy.Module { return new(MatchHeaderRE) },
	}
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchHeaderRE) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	if *m == nil {
		*m = make(map[string]*MatchRegexp)
	}
	for d.Next() {
		var first, second, third string
		if !d.Args(&first, &second) {
			return d.ArgErr()
		}

		var name, field, val string
		if d.Args(&third) {
			name = first
			field = second
			val = third
		} else {
			field = first
			val = second
		}

		// If there's already a pattern for this field
		// then we would end up overwriting the old one
		if (*m)[field] != nil {
			return d.Errf("header_regexp matcher can only be used once per named matcher, per header field: %s", field)
		}

		(*m)[field] = &MatchRegexp{Pattern: val, Name: name}

		if d.NextBlock(0) {
			return d.Err("malformed header_regexp matcher: blocks are not supported")
		}
	}
	return nil
}

// Match returns true if r matches m.
func (m MatchHeaderRE) Match(r *http.Request) bool {
	for field, rm := range m {
		actualFieldVals := getHeaderFieldVals(r.Header, field, r.Host)
		match := false
	fieldVal:
		for _, actualFieldVal := range actualFieldVals {
			repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
			if rm.Match(actualFieldVal, repl) {
				match = true
				break fieldVal
			}
		}
		if !match {
			return false
		}
	}
	return true
}

// Provision compiles m's regular expressions.
func (m MatchHeaderRE) Provision(ctx caddy.Context) error {
	for _, rm := range m {
		err := rm.Provision(ctx)
		if err != nil {
			return err
		}
	}
	return nil
}

// Validate validates m's regular expressions.
func (m MatchHeaderRE) Validate() error {
	for _, rm := range m {
		err := rm.Validate()
		if err != nil {
			return err
		}
	}
	return nil
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression header_regexp('foo', 'Field', 'fo+')
func (MatchHeaderRE) CELLibrary(ctx caddy.Context) (cel.Library, error) {
	unnamedPattern, err := CELMatcherImpl(
		"header_regexp",
		"header_regexp_request_string_string",
		[]*cel.Type{cel.StringType, cel.StringType},
		func(data ref.Val) (RequestMatcher, error) {
			refStringList := reflect.TypeOf([]string{})
			params, err := data.ConvertToNative(refStringList)
			if err != nil {
				return nil, err
			}
			strParams := params.([]string)
			matcher := MatchHeaderRE{}
			matcher[strParams[0]] = &MatchRegexp{Pattern: strParams[1], Name: ""}
			err = matcher.Provision(ctx)
			return matcher, err
		},
	)
	if err != nil {
		return nil, err
	}
	namedPattern, err := CELMatcherImpl(
		"header_regexp",
		"header_regexp_request_string_string_string",
		[]*cel.Type{cel.StringType, cel.StringType, cel.StringType},
		func(data ref.Val) (RequestMatcher, error) {
			refStringList := reflect.TypeOf([]string{})
			params, err := data.ConvertToNative(refStringList)
			if err != nil {
				return nil, err
			}
			strParams := params.([]string)
			matcher := MatchHeaderRE{}
			matcher[strParams[1]] = &MatchRegexp{Pattern: strParams[2], Name: strParams[0]}
			err = matcher.Provision(ctx)
			return matcher, err
		},
	)
	if err != nil {
		return nil, err
	}
	envOpts := append(unnamedPattern.CompileOptions(), namedPattern.CompileOptions()...)
	prgOpts := append(unnamedPattern.ProgramOptions(), namedPattern.ProgramOptions()...)
	return NewMatcherCELLibrary(envOpts, prgOpts), nil
}

// CaddyModule returns the Caddy module information.
func (MatchProtocol) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.protocol",
		New: func() caddy.Module { return new(MatchProtocol) },
	}
}

// Match returns true if r matches m.
func (m MatchProtocol) Match(r *http.Request) bool {
	switch string(m) {
	case "grpc":
		return strings.HasPrefix(r.Header.Get("content-type"), "application/grpc")
	case "https":
		return r.TLS != nil
	case "http":
		return r.TLS == nil
	case "http/1.0":
		return r.ProtoMajor == 1 && r.ProtoMinor == 0
	case "http/1.0+":
		return r.ProtoAtLeast(1, 0)
	case "http/1.1":
		return r.ProtoMajor == 1 && r.ProtoMinor == 1
	case "http/1.1+":
		return r.ProtoAtLeast(1, 1)
	case "http/2":
		return r.ProtoMajor == 2
	case "http/2+":
		return r.ProtoAtLeast(2, 0)
	case "http/3":
		return r.ProtoMajor == 3
	case "http/3+":
		return r.ProtoAtLeast(3, 0)
	}
	return false
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchProtocol) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	for d.Next() {
		var proto string
		if !d.Args(&proto) {
			return d.Err("expected exactly one protocol")
		}
		*m = MatchProtocol(proto)
	}
	return nil
}

// CELLibrary produces options that expose this matcher for use in CEL
// expression matchers.
//
// Example:
//
//	expression protocol('https')
func (MatchProtocol) CELLibrary(_ caddy.Context) (cel.Library, error) {
	return CELMatcherImpl(
		"protocol",
		"protocol_request_string",
		[]*cel.Type{cel.StringType},
		func(data ref.Val) (RequestMatcher, error) {
			protocolStr, ok := data.(types.String)
			if !ok {
				return nil, errors.New("protocol argument was not a string")
			}
			return MatchProtocol(strings.ToLower(string(protocolStr))), nil
		},
	)
}

// CaddyModule returns the Caddy module information.
func (MatchNot) CaddyModule() caddy.ModuleInfo {
	return caddy.ModuleInfo{
		ID:  "http.matchers.not",
		New: func() caddy.Module { return new(MatchNot) },
	}
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (m *MatchNot) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	for d.Next() {
		matcherSet, err := ParseCaddyfileNestedMatcherSet(d)
		if err != nil {
			return err
		}
		m.MatcherSetsRaw = append(m.MatcherSetsRaw, matcherSet)
	}
	return nil
}

// UnmarshalJSON satisfies json.Unmarshaler. It puts the JSON
// bytes directly into m's MatcherSetsRaw field.
func (m *MatchNot) UnmarshalJSON(data []byte) error {
	return json.Unmarshal(data, &m.MatcherSetsRaw)
}

// MarshalJSON satisfies json.Marshaler by marshaling
// m's raw matcher sets.
func (m MatchNot) MarshalJSON() ([]byte, error) {
	return json.Marshal(m.MatcherSetsRaw)
}

// Provision loads the matcher modules to be negated.
func (m *MatchNot) Provision(ctx caddy.Context) error {
	matcherSets, err := ctx.LoadModule(m, "MatcherSetsRaw")
	if err != nil {
		return fmt.Errorf("loading matcher sets: %v", err)
	}
	for _, modMap := range matcherSets.([]map[string]any) {
		var ms MatcherSet
		for _, modIface := range modMap {
			ms = append(ms, modIface.(RequestMatcher))
		}
		m.MatcherSets = append(m.MatcherSets, ms)
	}
	return nil
}

// Match returns true if r matches m. Since this matcher negates
// the embedded matchers, false is returned if any of its matcher
// sets return true.
func (m MatchNot) Match(r *http.Request) bool {
	for _, ms := range m.MatcherSets {
		if ms.Match(r) {
			return false
		}
	}
	return true
}

// MatchRegexp is an embedable type for matching
// using regular expressions. It adds placeholders
// to the request's replacer.
type MatchRegexp struct {
	// A unique name for this regular expression. Optional,
	// but useful to prevent overwriting captures from other
	// regexp matchers.
	Name string `json:"name,omitempty"`

	// The regular expression to evaluate, in RE2 syntax,
	// which is the same general syntax used by Go, Perl,
	// and Python. For details, see
	// [Go's regexp package](https://golang.org/pkg/regexp/).
	// Captures are accessible via placeholders. Unnamed
	// capture groups are exposed as their numeric, 1-based
	// index, while named capture groups are available by
	// the capture group name.
	Pattern string `json:"pattern"`

	compiled *regexp.Regexp
	phPrefix string
}

// Provision compiles the regular expression.
func (mre *MatchRegexp) Provision(caddy.Context) error {
	re, err := regexp.Compile(mre.Pattern)
	if err != nil {
		return fmt.Errorf("compiling matcher regexp %s: %v", mre.Pattern, err)
	}
	mre.compiled = re
	mre.phPrefix = regexpPlaceholderPrefix
	if mre.Name != "" {
		mre.phPrefix += "." + mre.Name
	}
	return nil
}

// Validate ensures mre is set up correctly.
func (mre *MatchRegexp) Validate() error {
	if mre.Name != "" && !wordRE.MatchString(mre.Name) {
		return fmt.Errorf("invalid regexp name (must contain only word characters): %s", mre.Name)
	}
	return nil
}

// Match returns true if input matches the compiled regular
// expression in mre. It sets values on the replacer repl
// associated with capture groups, using the given scope
// (namespace).
func (mre *MatchRegexp) Match(input string, repl *caddy.Replacer) bool {
	matches := mre.compiled.FindStringSubmatch(input)
	if matches == nil {
		return false
	}

	// save all capture groups, first by index
	for i, match := range matches {
		key := mre.phPrefix + "." + strconv.Itoa(i)
		repl.Set(key, match)
	}

	// then by name
	for i, name := range mre.compiled.SubexpNames() {
		if i != 0 && name != "" {
			key := mre.phPrefix + "." + name
			repl.Set(key, matches[i])
		}
	}

	return true
}

// UnmarshalCaddyfile implements caddyfile.Unmarshaler.
func (mre *MatchRegexp) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
	for d.Next() {
		// If this is the second iteration of the loop
		// then there's more than one path_regexp matcher
		// and we would end up overwriting the old one
		if mre.Pattern != "" {
			return d.Err("regular expression can only be used once per named matcher")
		}

		args := d.RemainingArgs()
		switch len(args) {
		case 1:
			mre.Pattern = args[0]
		case 2:
			mre.Name = args[0]
			mre.Pattern = args[1]
		default:
			return d.ArgErr()
		}
		if d.NextBlock(0) {
			return d.Err("malformed path_regexp matcher: blocks are not supported")
		}
	}
	return nil
}

// ParseCaddyfileNestedMatcher parses the Caddyfile tokens for a nested
// matcher set, and returns its raw module map value.
func ParseCaddyfileNestedMatcherSet(d *caddyfile.Dispenser) (caddy.ModuleMap, error) {
	matcherMap := make(map[string]RequestMatcher)

	// in case there are multiple instances of the same matcher, concatenate
	// their tokens (we expect that UnmarshalCaddyfile should be able to
	// handle more than one segment); otherwise, we'd overwrite other
	// instances of the matcher in this set
	tokensByMatcherName := make(map[string][]caddyfile.Token)
	for nesting := d.Nesting(); d.NextArg() || d.NextBlock(nesting); {
		matcherName := d.Val()
		tokensByMatcherName[matcherName] = append(tokensByMatcherName[matcherName], d.NextSegment()...)
	}

	for matcherName, tokens := range tokensByMatcherName {
		mod, err := caddy.GetModule("http.matchers." + matcherName)
		if err != nil {
			return nil, d.Errf("getting matcher module '%s': %v", matcherName, err)
		}
		unm, ok := mod.New().(caddyfile.Unmarshaler)
		if !ok {
			return nil, d.Errf("matcher module '%s' is not a Caddyfile unmarshaler", matcherName)
		}
		err = unm.UnmarshalCaddyfile(caddyfile.NewDispenser(tokens))
		if err != nil {
			return nil, err
		}
		rm, ok := unm.(RequestMatcher)
		if !ok {
			return nil, fmt.Errorf("matcher module '%s' is not a request matcher", matcherName)
		}
		matcherMap[matcherName] = rm
	}

	// we should now have a functional matcher, but we also
	// need to be able to marshal as JSON, otherwise config
	// adaptation will be missing the matchers!
	matcherSet := make(caddy.ModuleMap)
	for name, matcher := range matcherMap {
		jsonBytes, err := json.Marshal(matcher)
		if err != nil {
			return nil, fmt.Errorf("marshaling %T matcher: %v", matcher, err)
		}
		matcherSet[name] = jsonBytes
	}

	return matcherSet, nil
}

var wordRE = regexp.MustCompile(`\w+`)

const regexpPlaceholderPrefix = "http.regexp"

// MatcherErrorVarKey is the key used for the variable that
// holds an optional error emitted from a request matcher,
// to short-circuit the handler chain, since matchers cannot
// return errors via the RequestMatcher interface.
const MatcherErrorVarKey = "matchers.error"

// Interface guards
var (
	_ RequestMatcher    = (*MatchHost)(nil)
	_ caddy.Provisioner = (*MatchHost)(nil)
	_ RequestMatcher    = (*MatchPath)(nil)
	_ RequestMatcher    = (*MatchPathRE)(nil)
	_ caddy.Provisioner = (*MatchPathRE)(nil)
	_ RequestMatcher    = (*MatchMethod)(nil)
	_ RequestMatcher    = (*MatchQuery)(nil)
	_ RequestMatcher    = (*MatchHeader)(nil)
	_ RequestMatcher    = (*MatchHeaderRE)(nil)
	_ caddy.Provisioner = (*MatchHeaderRE)(nil)
	_ RequestMatcher    = (*MatchProtocol)(nil)
	_ RequestMatcher    = (*MatchNot)(nil)
	_ caddy.Provisioner = (*MatchNot)(nil)
	_ caddy.Provisioner = (*MatchRegexp)(nil)

	_ caddyfile.Unmarshaler = (*MatchHost)(nil)
	_ caddyfile.Unmarshaler = (*MatchPath)(nil)
	_ caddyfile.Unmarshaler = (*MatchPathRE)(nil)
	_ caddyfile.Unmarshaler = (*MatchMethod)(nil)
	_ caddyfile.Unmarshaler = (*MatchQuery)(nil)
	_ caddyfile.Unmarshaler = (*MatchHeader)(nil)
	_ caddyfile.Unmarshaler = (*MatchHeaderRE)(nil)
	_ caddyfile.Unmarshaler = (*MatchProtocol)(nil)
	_ caddyfile.Unmarshaler = (*VarsMatcher)(nil)
	_ caddyfile.Unmarshaler = (*MatchVarsRE)(nil)

	_ CELLibraryProducer = (*MatchHost)(nil)
	_ CELLibraryProducer = (*MatchPath)(nil)
	_ CELLibraryProducer = (*MatchPathRE)(nil)
	_ CELLibraryProducer = (*MatchMethod)(nil)
	_ CELLibraryProducer = (*MatchQuery)(nil)
	_ CELLibraryProducer = (*MatchHeader)(nil)
	_ CELLibraryProducer = (*MatchHeaderRE)(nil)
	_ CELLibraryProducer = (*MatchProtocol)(nil)
	// _ CELLibraryProducer = (*VarsMatcher)(nil)
	// _ CELLibraryProducer = (*MatchVarsRE)(nil)

	_ json.Marshaler   = (*MatchNot)(nil)
	_ json.Unmarshaler = (*MatchNot)(nil)
)