2019-06-30 17:07:58 -05:00
// Copyright 2015 Matthew Holt and The Caddy Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2019-05-04 11:49:50 -05:00
package reverseproxy
import (
"context"
2019-09-02 23:01:02 -05:00
"encoding/json"
2019-05-04 11:49:50 -05:00
"fmt"
"net"
"net/http"
"net/url"
2019-09-03 13:10:11 -05:00
"regexp"
2019-05-04 11:49:50 -05:00
"strings"
"time"
2019-09-02 23:01:02 -05:00
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
2019-05-04 11:49:50 -05:00
"golang.org/x/net/http/httpguts"
)
2019-09-02 23:01:02 -05:00
func init ( ) {
caddy . RegisterModule ( Handler { } )
2019-05-04 11:49:50 -05:00
}
2019-09-03 17:56:09 -05:00
// Handler implements a highly configurable and production-ready reverse proxy.
2019-09-02 23:01:02 -05:00
type Handler struct {
TransportRaw json . RawMessage ` json:"transport,omitempty" `
2019-09-03 20:06:54 -05:00
CBRaw json . RawMessage ` json:"circuit_breaker,omitempty" `
2019-09-02 23:01:02 -05:00
LoadBalancing * LoadBalancing ` json:"load_balancing,omitempty" `
HealthChecks * HealthChecks ` json:"health_checks,omitempty" `
2019-09-03 17:56:09 -05:00
Upstreams UpstreamPool ` json:"upstreams,omitempty" `
FlushInterval caddy . Duration ` json:"flush_interval,omitempty" `
2019-09-02 23:01:02 -05:00
Transport http . RoundTripper ` json:"-" `
2019-09-03 20:06:54 -05:00
CB CircuitBreaker ` json:"-" `
2019-09-02 23:01:02 -05:00
}
// CaddyModule returns the Caddy module information.
func ( Handler ) CaddyModule ( ) caddy . ModuleInfo {
return caddy . ModuleInfo {
Name : "http.handlers.reverse_proxy" ,
New : func ( ) caddy . Module { return new ( Handler ) } ,
2019-05-04 11:49:50 -05:00
}
}
2019-09-03 17:56:09 -05:00
// Provision ensures that h is set up properly before use.
2019-09-02 23:01:02 -05:00
func ( h * Handler ) Provision ( ctx caddy . Context ) error {
2019-09-03 20:06:54 -05:00
// start by loading modules
2019-09-02 23:01:02 -05:00
if h . TransportRaw != nil {
val , err := ctx . LoadModuleInline ( "protocol" , "http.handlers.reverse_proxy.transport" , h . TransportRaw )
if err != nil {
return fmt . Errorf ( "loading transport module: %s" , err )
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
h . Transport = val . ( http . RoundTripper )
h . TransportRaw = nil // allow GC to deallocate - TODO: Does this help?
}
if h . LoadBalancing != nil && h . LoadBalancing . SelectionPolicyRaw != nil {
val , err := ctx . LoadModuleInline ( "policy" ,
"http.handlers.reverse_proxy.selection_policies" ,
h . LoadBalancing . SelectionPolicyRaw )
if err != nil {
return fmt . Errorf ( "loading load balancing selection module: %s" , err )
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
h . LoadBalancing . SelectionPolicy = val . ( Selector )
h . LoadBalancing . SelectionPolicyRaw = nil // allow GC to deallocate - TODO: Does this help?
2019-05-04 11:49:50 -05:00
}
2019-09-03 20:06:54 -05:00
if h . CBRaw != nil {
val , err := ctx . LoadModuleInline ( "type" , "http.handlers.reverse_proxy.circuit_breakers" , h . CBRaw )
if err != nil {
return fmt . Errorf ( "loading circuit breaker module: %s" , err )
}
h . CB = val . ( CircuitBreaker )
h . CBRaw = nil // allow GC to deallocate - TODO: Does this help?
}
2019-05-04 11:49:50 -05:00
2019-09-02 23:01:02 -05:00
if h . Transport == nil {
h . Transport = defaultTransport
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
if h . LoadBalancing == nil {
h . LoadBalancing = new ( LoadBalancing )
}
if h . LoadBalancing . SelectionPolicy == nil {
h . LoadBalancing . SelectionPolicy = RandomSelection { }
}
if h . LoadBalancing . TryDuration > 0 && h . LoadBalancing . TryInterval == 0 {
// a non-zero try_duration with a zero try_interval
// will always spin the CPU for try_duration if the
2019-09-03 13:10:11 -05:00
// upstream is local or low-latency; avoid that by
// defaulting to a sane wait period between attempts
2019-09-02 23:01:02 -05:00
h . LoadBalancing . TryInterval = caddy . Duration ( 250 * time . Millisecond )
2019-05-04 11:49:50 -05:00
}
2019-09-03 13:10:11 -05:00
// if active health checks are enabled, configure them and start a worker
if h . HealthChecks != nil &&
h . HealthChecks . Active != nil &&
( h . HealthChecks . Active . Path != "" || h . HealthChecks . Active . Port != 0 ) {
timeout := time . Duration ( h . HealthChecks . Active . Timeout )
if timeout == 0 {
timeout = 10 * time . Second
}
h . HealthChecks . Active . stopChan = make ( chan struct { } )
h . HealthChecks . Active . httpClient = & http . Client {
Timeout : timeout ,
Transport : h . Transport ,
}
if h . HealthChecks . Active . Interval == 0 {
h . HealthChecks . Active . Interval = caddy . Duration ( 30 * time . Second )
}
if h . HealthChecks . Active . ExpectBody != "" {
var err error
h . HealthChecks . Active . bodyRegexp , err = regexp . Compile ( h . HealthChecks . Active . ExpectBody )
if err != nil {
return fmt . Errorf ( "expect_body: compiling regular expression: %v" , err )
}
}
go h . activeHealthChecker ( )
}
2019-09-02 23:01:02 -05:00
for _ , upstream := range h . Upstreams {
2019-09-03 20:06:54 -05:00
upstream . cb = h . CB
2019-09-02 23:01:02 -05:00
// url parser requires a scheme
if ! strings . Contains ( upstream . Address , "://" ) {
upstream . Address = "http://" + upstream . Address
}
u , err := url . Parse ( upstream . Address )
if err != nil {
return fmt . Errorf ( "invalid upstream address %s: %v" , upstream . Address , err )
}
upstream . hostURL = u
// if host already exists from a current config,
// use that instead; otherwise, add it
// TODO: make hosts modular, so that their state can be distributed in enterprise for example
// TODO: If distributed, the pool should be stored in storage...
var host Host = new ( upstreamHost )
activeHost , loaded := hosts . LoadOrStore ( u . String ( ) , host )
if loaded {
host = activeHost . ( Host )
}
upstream . Host = host
// if the passive health checker has a non-zero "unhealthy
// request count" but the upstream has no MaxRequests set
// (they are the same thing, but one is a default value for
// for upstreams with a zero MaxRequests), copy the default
// value into this upstream, since the value in the upstream
// is what is used during availability checks
if h . HealthChecks != nil &&
h . HealthChecks . Passive != nil &&
h . HealthChecks . Passive . UnhealthyRequestCount > 0 &&
upstream . MaxRequests == 0 {
upstream . MaxRequests = h . HealthChecks . Passive . UnhealthyRequestCount
}
2019-05-04 11:49:50 -05:00
2019-09-02 23:01:02 -05:00
if h . HealthChecks != nil {
// upstreams need independent access to the passive
// health check policy so they can, you know, passively
// do health checks
upstream . healthCheckPolicy = h . HealthChecks . Passive
}
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
return nil
2019-05-04 11:49:50 -05:00
}
2019-09-03 13:10:11 -05:00
// Cleanup cleans up the resources made by h during provisioning.
2019-09-02 23:01:02 -05:00
func ( h * Handler ) Cleanup ( ) error {
2019-09-03 13:10:11 -05:00
// stop the active health checker
if h . HealthChecks != nil &&
h . HealthChecks . Active != nil &&
h . HealthChecks . Active . stopChan != nil {
close ( h . HealthChecks . Active . stopChan )
}
// remove hosts from our config from the pool
2019-09-02 23:01:02 -05:00
for _ , upstream := range h . Upstreams {
hosts . Delete ( upstream . hostURL . String ( ) )
2019-05-04 11:49:50 -05:00
}
2019-09-03 13:10:11 -05:00
2019-09-02 23:01:02 -05:00
return nil
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
func ( h * Handler ) ServeHTTP ( w http . ResponseWriter , r * http . Request , next caddyhttp . Handler ) error {
// prepare the request for proxying; this is needed only once
err := h . prepareRequest ( r )
if err != nil {
return caddyhttp . Error ( http . StatusInternalServerError ,
fmt . Errorf ( "preparing request for upstream round-trip: %v" , err ) )
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
start := time . Now ( )
var proxyErr error
for {
// choose an available upstream
upstream := h . LoadBalancing . SelectionPolicy . Select ( h . Upstreams , r )
if upstream == nil {
if proxyErr == nil {
proxyErr = fmt . Errorf ( "no available upstreams" )
}
if ! h . tryAgain ( start , proxyErr ) {
break
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
continue
}
// proxy the request to that upstream
proxyErr = h . reverseProxy ( w , r , upstream )
if proxyErr == nil {
return nil
}
// remember this failure (if enabled)
h . countFailure ( upstream )
// if we've tried long enough, break
if ! h . tryAgain ( start , proxyErr ) {
break
}
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
return caddyhttp . Error ( http . StatusBadGateway , proxyErr )
}
// prepareRequest modifies req so that it is ready to be proxied,
// except for directing to a specific upstream. This method mutates
// headers and other necessary properties of the request and should
// be done just once (before proxying) regardless of proxy retries.
// This assumes that no mutations of the request are performed
// by h during or after proxying.
func ( h Handler ) prepareRequest ( req * http . Request ) error {
2019-05-04 11:49:50 -05:00
if req . ContentLength == 0 {
2019-09-02 23:01:02 -05:00
req . Body = nil // Issue golang/go#16036: nil Body for http.Transport retries
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
req . Close = false
2019-05-04 11:49:50 -05:00
2019-09-02 23:01:02 -05:00
// if User-Agent is not set by client, then explicitly
// disable it so it's not set to default value by std lib
if _ , ok := req . Header [ "User-Agent" ] ; ! ok {
req . Header . Set ( "User-Agent" , "" )
}
reqUpType := upgradeType ( req . Header )
removeConnectionHeaders ( req . Header )
2019-05-04 11:49:50 -05:00
// Remove hop-by-hop headers to the backend. Especially
// important is "Connection" because we want a persistent
// connection, regardless of what the client sent to us.
for _ , h := range hopHeaders {
2019-09-02 23:01:02 -05:00
hv := req . Header . Get ( h )
2019-05-04 11:49:50 -05:00
if hv == "" {
continue
}
if h == "Te" && hv == "trailers" {
2019-09-02 23:01:02 -05:00
// Issue golang/go#21096: tell backend applications that
2019-05-04 11:49:50 -05:00
// care about trailer support that we support
// trailers. (We do, but we don't go out of
// our way to advertise that unless the
// incoming client request thought it was
// worth mentioning)
continue
}
2019-09-02 23:01:02 -05:00
req . Header . Del ( h )
2019-05-04 11:49:50 -05:00
}
// After stripping all the hop-by-hop connection headers above, add back any
// necessary for protocol upgrades, such as for websockets.
if reqUpType != "" {
2019-09-02 23:01:02 -05:00
req . Header . Set ( "Connection" , "Upgrade" )
req . Header . Set ( "Upgrade" , reqUpType )
2019-05-04 11:49:50 -05:00
}
if clientIP , _ , err := net . SplitHostPort ( req . RemoteAddr ) ; err == nil {
// If we aren't the first proxy retain prior
// X-Forwarded-For information as a comma+space
// separated list and fold multiple headers into one.
2019-09-02 23:01:02 -05:00
if prior , ok := req . Header [ "X-Forwarded-For" ] ; ok {
2019-05-04 11:49:50 -05:00
clientIP = strings . Join ( prior , ", " ) + ", " + clientIP
}
2019-09-02 23:01:02 -05:00
req . Header . Set ( "X-Forwarded-For" , clientIP )
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
return nil
}
2019-09-03 17:56:09 -05:00
// reverseProxy performs a round-trip to the given backend and processes the response with the client.
// (This method is mostly the beginning of what was borrowed from the net/http/httputil package in the
// Go standard library which was used as the foundation.)
2019-09-02 23:01:02 -05:00
func ( h * Handler ) reverseProxy ( rw http . ResponseWriter , req * http . Request , upstream * Upstream ) error {
2019-09-03 17:56:09 -05:00
upstream . Host . CountRequest ( 1 )
defer upstream . Host . CountRequest ( - 1 )
2019-09-02 23:01:02 -05:00
// point the request to this upstream
h . directRequest ( req , upstream )
// do the round-trip
start := time . Now ( )
res , err := h . Transport . RoundTrip ( req )
latency := time . Since ( start )
2019-05-04 11:49:50 -05:00
if err != nil {
2019-09-02 23:01:02 -05:00
return err
2019-05-04 11:49:50 -05:00
}
2019-09-03 20:06:54 -05:00
// update circuit breaker on current conditions
if upstream . cb != nil {
upstream . cb . RecordMetric ( res . StatusCode , latency )
}
2019-09-02 23:01:02 -05:00
// perform passive health checks (if enabled)
if h . HealthChecks != nil && h . HealthChecks . Passive != nil {
// strike if the status code matches one that is "bad"
for _ , badStatus := range h . HealthChecks . Passive . UnhealthyStatus {
if caddyhttp . StatusCodeMatches ( res . StatusCode , badStatus ) {
h . countFailure ( upstream )
}
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
// strike if the roundtrip took too long
if h . HealthChecks . Passive . UnhealthyLatency > 0 &&
latency >= time . Duration ( h . HealthChecks . Passive . UnhealthyLatency ) {
h . countFailure ( upstream )
}
}
// Deal with 101 Switching Protocols responses: (WebSocket, h2c, etc)
if res . StatusCode == http . StatusSwitchingProtocols {
h . handleUpgradeResponse ( rw , req , res )
return nil
2019-05-04 11:49:50 -05:00
}
removeConnectionHeaders ( res . Header )
for _ , h := range hopHeaders {
res . Header . Del ( h )
}
copyHeader ( rw . Header ( ) , res . Header )
// The "Trailer" header isn't included in the Transport's response,
// at least for *http.Transport. Build it up from Trailer.
announcedTrailers := len ( res . Trailer )
if announcedTrailers > 0 {
trailerKeys := make ( [ ] string , 0 , len ( res . Trailer ) )
for k := range res . Trailer {
trailerKeys = append ( trailerKeys , k )
}
rw . Header ( ) . Add ( "Trailer" , strings . Join ( trailerKeys , ", " ) )
}
rw . WriteHeader ( res . StatusCode )
2019-09-02 23:01:02 -05:00
err = h . copyResponse ( rw , res . Body , h . flushInterval ( req , res ) )
2019-05-04 11:49:50 -05:00
if err != nil {
defer res . Body . Close ( )
// Since we're streaming the response, if we run into an error all we can do
2019-09-02 23:01:02 -05:00
// is abort the request. Issue golang/go#23643: ReverseProxy should use ErrAbortHandler
2019-05-04 11:49:50 -05:00
// on read error while copying body.
2019-09-02 23:01:02 -05:00
// TODO: Look into whether we want to panic at all in our case...
2019-05-04 11:49:50 -05:00
if ! shouldPanicOnCopyError ( req ) {
2019-09-02 23:01:02 -05:00
// p.logf("suppressing panic for copyResponse error in test; copy error: %v", err)
return err
2019-05-04 11:49:50 -05:00
}
panic ( http . ErrAbortHandler )
}
res . Body . Close ( ) // close now, instead of defer, to populate res.Trailer
if len ( res . Trailer ) > 0 {
// Force chunking if we saw a response trailer.
// This prevents net/http from calculating the length for short
// bodies and adding a Content-Length.
if fl , ok := rw . ( http . Flusher ) ; ok {
fl . Flush ( )
}
}
if len ( res . Trailer ) == announcedTrailers {
copyHeader ( rw . Header ( ) , res . Trailer )
2019-09-02 23:01:02 -05:00
return nil
2019-05-04 11:49:50 -05:00
}
for k , vv := range res . Trailer {
k = http . TrailerPrefix + k
for _ , v := range vv {
rw . Header ( ) . Add ( k , v )
}
}
2019-09-02 23:01:02 -05:00
return nil
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
// tryAgain takes the time that the handler was initially invoked
// as well as any error currently obtained and returns true if
// another attempt should be made at proxying the request. If
// true is returned, it has already blocked long enough before
// the next retry (i.e. no more sleeping is needed). If false is
// returned, the handler should stop trying to proxy the request.
func ( h Handler ) tryAgain ( start time . Time , proxyErr error ) bool {
// if downstream has canceled the request, break
if proxyErr == context . Canceled {
return false
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
// if we've tried long enough, break
if time . Since ( start ) >= time . Duration ( h . LoadBalancing . TryDuration ) {
return false
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
// otherwise, wait and try the next available host
time . Sleep ( time . Duration ( h . LoadBalancing . TryInterval ) )
return true
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
// directRequest modifies only req.URL so that it points to the
// given upstream host. It must modify ONLY the request URL.
func ( h Handler ) directRequest ( req * http . Request , upstream * Upstream ) {
target := upstream . hostURL
req . URL . Scheme = target . Scheme
req . URL . Host = target . Host
req . URL . Path = singleJoiningSlash ( target . Path , req . URL . Path ) // TODO: This might be a bug (if any part of the path was augmented from a previously-tried upstream; need to start from clean original path of request, same for query string!)
if target . RawQuery == "" || req . URL . RawQuery == "" {
req . URL . RawQuery = target . RawQuery + req . URL . RawQuery
} else {
req . URL . RawQuery = target . RawQuery + "&" + req . URL . RawQuery
2019-05-04 11:49:50 -05:00
}
}
2019-09-02 23:01:02 -05:00
// shouldPanicOnCopyError reports whether the reverse proxy should
// panic with http.ErrAbortHandler. This is the right thing to do by
// default, but Go 1.10 and earlier did not, so existing unit tests
// weren't expecting panics. Only panic in our own tests, or when
// running under the HTTP server.
// TODO: I don't know if we want this at all...
func shouldPanicOnCopyError ( req * http . Request ) bool {
// if inOurTests {
// // Our tests know to handle this panic.
// return true
// }
if req . Context ( ) . Value ( http . ServerContextKey ) != nil {
// We seem to be running under an HTTP server, so
// it'll recover the panic.
return true
}
// Otherwise act like Go 1.10 and earlier to not break
// existing tests.
return false
}
func copyHeader ( dst , src http . Header ) {
for k , vv := range src {
for _ , v := range vv {
dst . Add ( k , v )
}
}
}
func cloneHeader ( h http . Header ) http . Header {
h2 := make ( http . Header , len ( h ) )
for k , vv := range h {
vv2 := make ( [ ] string , len ( vv ) )
copy ( vv2 , vv )
h2 [ k ] = vv2
}
return h2
}
2019-05-04 11:49:50 -05:00
func upgradeType ( h http . Header ) string {
if ! httpguts . HeaderValuesContainsToken ( h [ "Connection" ] , "Upgrade" ) {
return ""
}
return strings . ToLower ( h . Get ( "Upgrade" ) )
}
2019-09-02 23:01:02 -05:00
func singleJoiningSlash ( a , b string ) string {
aslash := strings . HasSuffix ( a , "/" )
bslash := strings . HasPrefix ( b , "/" )
switch {
case aslash && bslash :
return a + b [ 1 : ]
case ! aslash && ! bslash :
return a + "/" + b
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
return a + b
}
2019-05-04 11:49:50 -05:00
2019-09-02 23:01:02 -05:00
// removeConnectionHeaders removes hop-by-hop headers listed in the "Connection" header of h.
// See RFC 7230, section 6.1
func removeConnectionHeaders ( h http . Header ) {
if c := h . Get ( "Connection" ) ; c != "" {
for _ , f := range strings . Split ( c , "," ) {
if f = strings . TrimSpace ( f ) ; f != "" {
h . Del ( f )
}
}
2019-05-04 11:49:50 -05:00
}
2019-09-02 23:01:02 -05:00
}
2019-09-03 17:56:09 -05:00
// LoadBalancing has parameters related to load balancing.
2019-09-02 23:01:02 -05:00
type LoadBalancing struct {
SelectionPolicyRaw json . RawMessage ` json:"selection_policy,omitempty" `
TryDuration caddy . Duration ` json:"try_duration,omitempty" `
TryInterval caddy . Duration ` json:"try_interval,omitempty" `
SelectionPolicy Selector ` json:"-" `
}
2019-09-03 17:56:09 -05:00
// Selector selects an available upstream from the pool.
2019-09-02 23:01:02 -05:00
type Selector interface {
2019-09-03 17:56:09 -05:00
Select ( UpstreamPool , * http . Request ) * Upstream
2019-09-02 23:01:02 -05:00
}
// Hop-by-hop headers. These are removed when sent to the backend.
// As of RFC 7230, hop-by-hop headers are required to appear in the
// Connection header field. These are the headers defined by the
// obsoleted RFC 2616 (section 13.5.1) and are used for backward
// compatibility.
var hopHeaders = [ ] string {
"Connection" ,
"Proxy-Connection" , // non-standard but still sent by libcurl and rejected by e.g. google
"Keep-Alive" ,
"Proxy-Authenticate" ,
"Proxy-Authorization" ,
"Te" , // canonicalized version of "TE"
"Trailer" , // not Trailers per URL above; https://www.rfc-editor.org/errata_search.php?eid=4522
"Transfer-Encoding" ,
"Upgrade" ,
}
2019-09-03 13:10:11 -05:00
// TODO: see if we can use this
// var bufPool = sync.Pool{
// New: func() interface{} {
// return new(bytes.Buffer)
// },
// }
2019-09-02 23:01:02 -05:00
// Interface guards
var (
_ caddy . Provisioner = ( * Handler ) ( nil )
_ caddy . CleanerUpper = ( * Handler ) ( nil )
2019-09-03 17:56:09 -05:00
_ caddyhttp . MiddlewareHandler = ( * Handler ) ( nil )
2019-09-02 23:01:02 -05:00
)