diff --git a/data.go b/data.go index 6be3c15..954f575 100644 --- a/data.go +++ b/data.go @@ -323,7 +323,6 @@ func (r Request) String() string { func NewRequest(r *http.Request, baseURL *url.URL) (*Request, error) { var err error req := &Request{Original: r} - path := r.URL.EscapedPath()[1:] // strip leading slash // first segment should be options parts := strings.SplitN(path, "/", 2) @@ -331,13 +330,15 @@ func NewRequest(r *http.Request, baseURL *url.URL) (*Request, error) { return nil, URLError{"too few path segments", r.URL} } - req.URL, err = parseURL(parts[1], (baseURL == nil)) + req.URL, err = ParseURL(parts[1], (baseURL == nil)) // don't url decode if baseURL is set if err != nil { return nil, URLError{fmt.Sprintf("unable to parse remote URL: %v", err), r.URL} } req.Options = ParseOptions(parts[0]) + + if baseURL != nil { req.URL = baseURL.ResolveReference(req.URL) } @@ -365,9 +366,9 @@ func NewRequest(r *http.Request, baseURL *url.URL) (*Request, error) { var reCleanedURL = regexp.MustCompile(`^(https?):/+([^/])`) -// parseURL parses s as a URL, handling URLs that have been munged by +// ParseURL parses s as a URL, handling URLs that have been munged by // path.Clean or a webserver that collapses multiple slashes. -func parseURL(s string, urlDecode bool) (*url.URL, error) { +func ParseURL(s string, urlDecode bool) (*url.URL, error) { var err error // convert http:/example.com to http://example.com @@ -400,7 +401,7 @@ func decodeURL(s string) (string, error) { for i := 0; !reDecodedURL.MatchString(decodedURL) && i < maxDecodeAttempts; i++ { decodedURL, err = url.QueryUnescape(decodedURL) if err != nil { - return "", URLError{"remote URL could not be decoded", nil} + return "", URLError{fmt.Sprintf("remote URL could not be decoded: %v", err), nil} } } if reDecodedURL.MatchString(decodedURL) { diff --git a/data_test.go b/data_test.go index 42fe864..0df60d1 100644 --- a/data_test.go +++ b/data_test.go @@ -116,6 +116,9 @@ func TestNewRequest(t *testing.T) { // invalid URL because options now required {"http://localhost/http://example.com/foo", "", emptyOptions, true}, + // invalid URL because remote url has invalid url encoding (the %u) + {"http://localhost/x/https%253A%252F%252Fexample.com%252F%253FbadParam%253D%25u0414", "", emptyOptions, true}, + // invalid options. These won't return errors, but will not fully parse the options { "http://localhost/s/http://example.com/", @@ -126,7 +129,9 @@ func TestNewRequest(t *testing.T) { "http://example.com/", Options{Width: 1}, false, }, + // valid URLs + { "http://localhost//http://example.com/foo", "http://example.com/foo", emptyOptions, false, @@ -173,6 +178,16 @@ func TestNewRequest(t *testing.T) { "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld", "http://example.com/foo/bar?hello=world", emptyOptions, false, }, + // escaped remote including encoded querystring and unencoded querystring (not retained) + { + "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld?a=b", + "http://example.com/foo/bar?hello=world", emptyOptions, false, + }, + // escaped remote WITHOUT encoded querystring and unencoded querystring (should be retained) + { + "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar?a=b", + "http://example.com/foo/bar?a=b", emptyOptions, false, + }, { "http://localhost/x/https%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld", "https://example.com/foo/bar?hello=world", emptyOptions, false, @@ -287,3 +302,55 @@ func TestNewRequest_BaseURL(t *testing.T) { } } } + + + +func TestParseURL(t *testing.T) { + tests := []struct { + URL string // input URL to parse as an imageproxy request + URLDecode bool // expected options parsed from input + URLExpected string // expected URL of remote image parsed from input + ExpectError bool // whether an error is expected from NewRequest + }{ + // should fix missing slashes + {"http:/example.com/", true, + "http://example.com/", false}, + {"https:/example.com/", true, + "https://example.com/", false}, + + // should decode when told + {"https%253A%252F%252Fexample.com%252Fimg.jpg%253Ffoo%253Dbar%2526bar%253Ddo%25252Fnot%25252Fdecode%25252Fme", true, + "https://example.com/img.jpg?foo=bar&bar=do%2Fnot%2Fdecode%2Fme", false}, + + // should NOT decode unless told (e.g. baseURL set) + {"https%3A%2F%2Fexample.com%2Fimg.jpg%3Ffoo%3Dbar%26bar%3Ddo%252Fnot%252Fdecode%252Fme", false, + "https%3A%2F%2Fexample.com%2Fimg.jpg%3Ffoo%3Dbar%26bar%3Ddo%252Fnot%252Fdecode%252Fme", false}, + + // should return original url if asked to decode but can't find anything that + // looks like absolute url (starts with https?://) before giving up + {"https%3Aexample.com%2Fimg.jpg", true, + "https%3Aexample.com%2Fimg.jpg", false}, + + // should error when asked to decode a url with invalid encoding + {"https%253A%252F%252Fexample.com%252F%253FbadParam%253D%25u0414", true, + "", true}, + } + + for _, tt := range tests { + + URLOut, err := ParseURL(tt.URL, tt.URLDecode) + if tt.ExpectError { + if err == nil { + t.Errorf("ParseURL(%q, decode=%v) did not return expected error", tt.URL, tt.URLDecode) + } + continue + } else if err != nil { + t.Errorf("ParseURL(%v, decode=%v) return unexpected error: %v", tt.URL, tt.URLDecode, err) + continue + } + + if got, want := URLOut.String(), tt.URLExpected; got != want { + t.Errorf("ParseURL(%q, decode=%v) returned = %v, wanted %v", tt.URL, tt.URLDecode, got, want) + } + } +}