0
Fork 0
mirror of https://github.com/caddyserver/caddy.git synced 2024-12-30 22:34:15 -05:00
caddy/caddyhttp/proxy/reverseproxy.go
Matt Holt d5371aff22 httpserver/all: Clean up and standardize request URL handling (#1633)
* httpserver/all: Clean up and standardize request URL handling

The HTTP server now always creates a context value on the request which
is a copy of the request's URL struct. It should not be modified by
middlewares, but it is safe to get the value out of the request and make
changes to it locally-scoped. Thus, the value in the context always
stores the original request URL information as it was received. Any
rewrites that happen will be to the request's URL field directly.

The HTTP server no longer cleans /sanitizes the request URL. It made too
many strong assumptions and ended up making a lot of middleware more
complicated, including upstream proxying (and fastcgi). To alleviate
this complexity, we no longer change the request URL. Middlewares are
responsible to access the disk safely by using http.Dir or, if not
actually opening files, they can use httpserver.SafePath().

I'm hoping this will address issues with #1624, #1584, #1582, and others.

* staticfiles: Fix test on Windows

@abiosoft: I still can't figure out exactly what this is for. 😅

* Use (potentially) changed URL for browse redirects, as before

* Use filepath.ToSlash, clean up a couple proxy test cases

* Oops, fix variable name
2017-05-01 23:11:10 -06:00

605 lines
17 KiB
Go

// This file is adapted from code in the net/http/httputil
// package of the Go standard library, which is by the
// Go Authors, and bears this copyright and license info:
//
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// This file has been modified from the standard lib to
// meet the needs of the application.
package proxy
import (
"crypto/tls"
"io"
"net"
"net/http"
"net/url"
"strings"
"sync"
"time"
"golang.org/x/net/http2"
"github.com/mholt/caddy/caddyhttp/httpserver"
)
var (
defaultDialer = &net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}
bufferPool = sync.Pool{New: createBuffer}
)
func createBuffer() interface{} {
return make([]byte, 0, 32*1024)
}
func pooledIoCopy(dst io.Writer, src io.Reader) {
buf := bufferPool.Get().([]byte)
defer bufferPool.Put(buf)
// CopyBuffer only uses buf up to its length and panics if it's 0.
// Due to that we extend buf's length to its capacity here and
// ensure it's always non-zero.
bufCap := cap(buf)
io.CopyBuffer(dst, src, buf[0:bufCap:bufCap])
}
// onExitFlushLoop is a callback set by tests to detect the state of the
// flushLoop() goroutine.
var onExitFlushLoop func()
// ReverseProxy is an HTTP Handler that takes an incoming request and
// sends it to another server, proxying the response back to the
// client.
type ReverseProxy struct {
// Director must be a function which modifies
// the request into a new request to be sent
// using Transport. Its response is then copied
// back to the original client unmodified.
Director func(*http.Request)
// The transport used to perform proxy requests.
// If nil, http.DefaultTransport is used.
Transport http.RoundTripper
// FlushInterval specifies the flush interval
// to flush to the client while copying the
// response body.
// If zero, no periodic flushing is done.
FlushInterval time.Duration
}
// Though the relevant directive prefix is just "unix:", url.Parse
// will - assuming the regular URL scheme - add additional slashes
// as if "unix" was a request protocol.
// What we need is just the path, so if "unix:/var/run/www.socket"
// was the proxy directive, the parsed hostName would be
// "unix:///var/run/www.socket", hence the ambiguous trimming.
func socketDial(hostName string) func(network, addr string) (conn net.Conn, err error) {
return func(network, addr string) (conn net.Conn, err error) {
return net.Dial("unix", hostName[len("unix://"):])
}
}
func singleJoiningSlash(a, b string) string {
aslash := strings.HasSuffix(a, "/")
bslash := strings.HasPrefix(b, "/")
switch {
case aslash && bslash:
return a + b[1:]
case !aslash && !bslash && b != "":
return a + "/" + b
}
return a + b
}
// NewSingleHostReverseProxy returns a new ReverseProxy that rewrites
// URLs to the scheme, host, and base path provided in target. If the
// target's path is "/base" and the incoming request was for "/dir",
// the target request will be for /base/dir.
// Without logic: target's path is "/", incoming is "/api/messages",
// without is "/api", then the target request will be for /messages.
func NewSingleHostReverseProxy(target *url.URL, without string, keepalive int) *ReverseProxy {
targetQuery := target.RawQuery
director := func(req *http.Request) {
if target.Scheme == "unix" {
// to make Dial work with unix URL,
// scheme and host have to be faked
req.URL.Scheme = "http"
req.URL.Host = "socket"
} else {
req.URL.Scheme = target.Scheme
req.URL.Host = target.Host
}
// remove the `without` prefix
if without != "" {
req.URL.Path = strings.TrimPrefix(req.URL.Path, without)
if req.URL.Opaque != "" {
req.URL.Opaque = strings.TrimPrefix(req.URL.Opaque, without)
}
if req.URL.RawPath != "" {
req.URL.RawPath = strings.TrimPrefix(req.URL.RawPath, without)
}
}
// prefer returns val if it isn't empty, otherwise def
prefer := func(val, def string) string {
if val != "" {
return val
}
return def
}
// Make up the final URL by concatenating the request and target URL.
//
// If there is encoded part in request or target URL,
// the final URL should also be in encoded format.
// Here, we concatenate their encoded parts which are stored
// in URL.Opaque and URL.RawPath, if it is empty use
// URL.Path instead.
if req.URL.Opaque != "" || target.Opaque != "" {
req.URL.Opaque = singleJoiningSlash(
prefer(target.Opaque, target.Path),
prefer(req.URL.Opaque, req.URL.Path))
}
if req.URL.RawPath != "" || target.RawPath != "" {
req.URL.RawPath = singleJoiningSlash(
prefer(target.RawPath, target.Path),
prefer(req.URL.RawPath, req.URL.Path))
}
req.URL.Path = singleJoiningSlash(target.Path, req.URL.Path)
// Trims the path of the socket from the URL path.
// This is done because req.URL passed to your proxied service
// will have the full path of the socket file prefixed to it.
// Calling /test on a server that proxies requests to
// unix:/var/run/www.socket will thus set the requested path
// to /var/run/www.socket/test, rendering paths useless.
if target.Scheme == "unix" {
// See comment on socketDial for the trim
socketPrefix := target.String()[len("unix://"):]
req.URL.Path = strings.TrimPrefix(req.URL.Path, socketPrefix)
if req.URL.Opaque != "" {
req.URL.Opaque = strings.TrimPrefix(req.URL.Opaque, socketPrefix)
}
if req.URL.RawPath != "" {
req.URL.RawPath = strings.TrimPrefix(req.URL.RawPath, socketPrefix)
}
}
if targetQuery == "" || req.URL.RawQuery == "" {
req.URL.RawQuery = targetQuery + req.URL.RawQuery
} else {
req.URL.RawQuery = targetQuery + "&" + req.URL.RawQuery
}
}
rp := &ReverseProxy{Director: director, FlushInterval: 250 * time.Millisecond} // flushing good for streaming & server-sent events
if target.Scheme == "unix" {
rp.Transport = &http.Transport{
Dial: socketDial(target.String()),
}
} else if keepalive != http.DefaultMaxIdleConnsPerHost {
// if keepalive is equal to the default,
// just use default transport, to avoid creating
// a brand new transport
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
Dial: defaultDialer.Dial,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
}
if keepalive == 0 {
transport.DisableKeepAlives = true
} else {
transport.MaxIdleConnsPerHost = keepalive
}
if httpserver.HTTP2 {
http2.ConfigureTransport(transport)
}
rp.Transport = transport
}
return rp
}
// UseInsecureTransport is used to facilitate HTTPS proxying
// when it is OK for upstream to be using a bad certificate,
// since this transport skips verification.
func (rp *ReverseProxy) UseInsecureTransport() {
if rp.Transport == nil {
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
Dial: defaultDialer.Dial,
TLSHandshakeTimeout: 10 * time.Second,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
if httpserver.HTTP2 {
http2.ConfigureTransport(transport)
}
rp.Transport = transport
} else if transport, ok := rp.Transport.(*http.Transport); ok {
if transport.TLSClientConfig == nil {
transport.TLSClientConfig = &tls.Config{}
}
transport.TLSClientConfig.InsecureSkipVerify = true
// No http2.ConfigureTransport() here.
// For now this is only added in places where
// an http.Transport is actually created.
}
}
// ServeHTTP serves the proxied request to the upstream by performing a roundtrip.
// It is designed to handle websocket connection upgrades as well.
func (rp *ReverseProxy) ServeHTTP(rw http.ResponseWriter, outreq *http.Request, respUpdateFn respUpdateFn) error {
transport := rp.Transport
if requestIsWebsocket(outreq) {
transport = newConnHijackerTransport(transport)
} else if transport == nil {
transport = http.DefaultTransport
}
rp.Director(outreq)
res, err := transport.RoundTrip(outreq)
if err != nil {
return err
}
isWebsocket := res.StatusCode == http.StatusSwitchingProtocols && strings.ToLower(res.Header.Get("Upgrade")) == "websocket"
// Remove hop-by-hop headers listed in the
// "Connection" header of the response.
if c := res.Header.Get("Connection"); c != "" {
for _, f := range strings.Split(c, ",") {
if f = strings.TrimSpace(f); f != "" {
res.Header.Del(f)
}
}
}
for _, h := range hopHeaders {
res.Header.Del(h)
}
if respUpdateFn != nil {
respUpdateFn(res)
}
if isWebsocket {
res.Body.Close()
hj, ok := rw.(http.Hijacker)
if !ok {
panic(httpserver.NonHijackerError{Underlying: rw})
}
conn, brw, err := hj.Hijack()
if err != nil {
return err
}
defer conn.Close()
var backendConn net.Conn
if hj, ok := transport.(*connHijackerTransport); ok {
backendConn = hj.Conn
if _, err := conn.Write(hj.Replay); err != nil {
return err
}
bufferPool.Put(hj.Replay)
} else {
backendConn, err = net.Dial("tcp", outreq.URL.Host)
if err != nil {
return err
}
outreq.Write(backendConn)
}
defer backendConn.Close()
// Proxy backend -> frontend.
go pooledIoCopy(conn, backendConn)
// Proxy frontend -> backend.
//
// NOTE: Hijack() sometimes returns buffered up bytes in brw which
// would be lost if we didn't read them out manually below.
if brw != nil {
if n := brw.Reader.Buffered(); n > 0 {
rbuf, err := brw.Reader.Peek(n)
if err != nil {
return err
}
backendConn.Write(rbuf)
}
}
pooledIoCopy(backendConn, conn)
} else {
copyHeader(rw.Header(), res.Header)
// The "Trailer" header isn't included in the Transport's response,
// at least for *http.Transport. Build it up from Trailer.
if len(res.Trailer) > 0 {
trailerKeys := make([]string, 0, len(res.Trailer))
for k := range res.Trailer {
trailerKeys = append(trailerKeys, k)
}
rw.Header().Add("Trailer", strings.Join(trailerKeys, ", "))
}
rw.WriteHeader(res.StatusCode)
if len(res.Trailer) > 0 {
// Force chunking if we saw a response trailer.
// This prevents net/http from calculating the length for short
// bodies and adding a Content-Length.
if fl, ok := rw.(http.Flusher); ok {
fl.Flush()
}
}
rp.copyResponse(rw, res.Body)
res.Body.Close() // close now, instead of defer, to populate res.Trailer
copyHeader(rw.Header(), res.Trailer)
}
return nil
}
func (rp *ReverseProxy) copyResponse(dst io.Writer, src io.Reader) {
if rp.FlushInterval != 0 {
if wf, ok := dst.(writeFlusher); ok {
mlw := &maxLatencyWriter{
dst: wf,
latency: rp.FlushInterval,
done: make(chan bool),
}
go mlw.flushLoop()
defer mlw.stop()
dst = mlw
}
}
pooledIoCopy(dst, src)
}
// skip these headers if they already exist.
// see https://github.com/mholt/caddy/pull/1112#discussion_r80092582
var skipHeaders = map[string]struct{}{
"Content-Type": {},
"Content-Disposition": {},
"Accept-Ranges": {},
"Set-Cookie": {},
"Cache-Control": {},
"Expires": {},
}
func copyHeader(dst, src http.Header) {
for k, vv := range src {
if _, ok := dst[k]; ok {
// skip some predefined headers
// see https://github.com/mholt/caddy/issues/1086
if _, shouldSkip := skipHeaders[k]; shouldSkip {
continue
}
// otherwise, overwrite
dst.Del(k)
}
for _, v := range vv {
dst.Add(k, v)
}
}
}
// Hop-by-hop headers. These are removed when sent to the backend.
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html
var hopHeaders = []string{
"Alt-Svc",
"Alternate-Protocol",
"Connection",
"Keep-Alive",
"Proxy-Authenticate",
"Proxy-Authorization",
"Proxy-Connection", // non-standard but still sent by libcurl and rejected by e.g. google
"Te", // canonicalized version of "TE"
"Trailer", // not Trailers per URL above; http://www.rfc-editor.org/errata_search.php?eid=4522
"Transfer-Encoding",
"Upgrade",
}
type respUpdateFn func(resp *http.Response)
type hijackedConn struct {
net.Conn
hj *connHijackerTransport
}
func (c *hijackedConn) Read(b []byte) (n int, err error) {
n, err = c.Conn.Read(b)
c.hj.Replay = append(c.hj.Replay, b[:n]...)
return
}
func (c *hijackedConn) Close() error {
return nil
}
type connHijackerTransport struct {
*http.Transport
Conn net.Conn
Replay []byte
}
func newConnHijackerTransport(base http.RoundTripper) *connHijackerTransport {
t := &http.Transport{
MaxIdleConnsPerHost: -1,
}
if b, _ := base.(*http.Transport); b != nil {
tlsClientConfig := b.TLSClientConfig
if tlsClientConfig != nil && tlsClientConfig.NextProtos != nil {
tlsClientConfig = tlsClientConfig.Clone()
tlsClientConfig.NextProtos = nil
}
t.Proxy = b.Proxy
t.TLSClientConfig = tlsClientConfig
t.TLSHandshakeTimeout = b.TLSHandshakeTimeout
t.Dial = b.Dial
t.DialTLS = b.DialTLS
} else {
t.Proxy = http.ProxyFromEnvironment
t.TLSHandshakeTimeout = 10 * time.Second
}
hj := &connHijackerTransport{t, nil, bufferPool.Get().([]byte)[:0]}
dial := getTransportDial(t)
dialTLS := getTransportDialTLS(t)
t.Dial = func(network, addr string) (net.Conn, error) {
c, err := dial(network, addr)
hj.Conn = c
return &hijackedConn{c, hj}, err
}
t.DialTLS = func(network, addr string) (net.Conn, error) {
c, err := dialTLS(network, addr)
hj.Conn = c
return &hijackedConn{c, hj}, err
}
return hj
}
// getTransportDial always returns a plain Dialer
// and defaults to the existing t.Dial.
func getTransportDial(t *http.Transport) func(network, addr string) (net.Conn, error) {
if t.Dial != nil {
return t.Dial
}
return defaultDialer.Dial
}
// getTransportDial always returns a TLS Dialer
// and defaults to the existing t.DialTLS.
func getTransportDialTLS(t *http.Transport) func(network, addr string) (net.Conn, error) {
if t.DialTLS != nil {
return t.DialTLS
}
// newConnHijackerTransport will modify t.Dial after calling this method
// => Create a backup reference.
plainDial := getTransportDial(t)
// The following DialTLS implementation stems from the Go stdlib and
// is identical to what happens if DialTLS is not provided.
// Source: https://github.com/golang/go/blob/230a376b5a67f0e9341e1fa47e670ff762213c83/src/net/http/transport.go#L1018-L1051
return func(network, addr string) (net.Conn, error) {
plainConn, err := plainDial(network, addr)
if err != nil {
return nil, err
}
tlsClientConfig := t.TLSClientConfig
if tlsClientConfig == nil {
tlsClientConfig = &tls.Config{}
}
if !tlsClientConfig.InsecureSkipVerify && tlsClientConfig.ServerName == "" {
tlsClientConfig.ServerName = stripPort(addr)
}
tlsConn := tls.Client(plainConn, tlsClientConfig)
errc := make(chan error, 2)
var timer *time.Timer
if d := t.TLSHandshakeTimeout; d != 0 {
timer = time.AfterFunc(d, func() {
errc <- tlsHandshakeTimeoutError{}
})
}
go func() {
err := tlsConn.Handshake()
if timer != nil {
timer.Stop()
}
errc <- err
}()
if err := <-errc; err != nil {
plainConn.Close()
return nil, err
}
if !tlsClientConfig.InsecureSkipVerify {
hostname := tlsClientConfig.ServerName
if hostname == "" {
hostname = stripPort(addr)
}
if err := tlsConn.VerifyHostname(hostname); err != nil {
plainConn.Close()
return nil, err
}
}
return tlsConn, nil
}
}
// stripPort returns address without its port if it has one and
// works with IP addresses as well as hostnames formatted as host:port.
//
// IPv6 addresses (excluding the port) must be enclosed in
// square brackets similar to the requirements of Go's stdlib.
func stripPort(address string) string {
// Keep in mind that the address might be a IPv6 address
// and thus contain a colon, but not have a port.
portIdx := strings.LastIndex(address, ":")
ipv6Idx := strings.LastIndex(address, "]")
if portIdx > ipv6Idx {
address = address[:portIdx]
}
return address
}
type tlsHandshakeTimeoutError struct{}
func (tlsHandshakeTimeoutError) Timeout() bool { return true }
func (tlsHandshakeTimeoutError) Temporary() bool { return true }
func (tlsHandshakeTimeoutError) Error() string { return "net/http: TLS handshake timeout" }
func requestIsWebsocket(req *http.Request) bool {
return strings.ToLower(req.Header.Get("Upgrade")) == "websocket" && strings.Contains(strings.ToLower(req.Header.Get("Connection")), "upgrade")
}
type writeFlusher interface {
io.Writer
http.Flusher
}
type maxLatencyWriter struct {
dst writeFlusher
latency time.Duration
lk sync.Mutex // protects Write + Flush
done chan bool
}
func (m *maxLatencyWriter) Write(p []byte) (int, error) {
m.lk.Lock()
defer m.lk.Unlock()
return m.dst.Write(p)
}
func (m *maxLatencyWriter) flushLoop() {
t := time.NewTicker(m.latency)
defer t.Stop()
for {
select {
case <-m.done:
if onExitFlushLoop != nil {
onExitFlushLoop()
}
return
case <-t.C:
m.lk.Lock()
m.dst.Flush()
m.lk.Unlock()
}
}
}
func (m *maxLatencyWriter) stop() { m.done <- true }