0
Fork 0
mirror of https://github.com/caddyserver/caddy.git synced 2024-12-30 22:34:15 -05:00

caddyfile: Export Tokenize function for lexing (#3549)

This commit is contained in:
Francis Lavoie 2020-07-20 15:55:51 -04:00 committed by GitHub
parent 6cea1f239d
commit fb9d874fa9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 54 deletions

View file

@ -16,6 +16,7 @@ package caddyfile
import ( import (
"bufio" "bufio"
"bytes"
"io" "io"
"unicode" "unicode"
) )
@ -168,3 +169,21 @@ func (l *lexer) next() bool {
val = append(val, ch) val = append(val, ch)
} }
} }
// Tokenize takes bytes as input and lexes it into
// a list of tokens that can be parsed as a Caddyfile.
// Also takes a filename to fill the token's File as
// the source of the tokens, which is important to
// determine relative paths for `import` directives.
func Tokenize(input []byte, filename string) ([]Token, error) {
l := lexer{}
if err := l.load(bytes.NewReader(input)); err != nil {
return nil, err
}
var tokens []Token
for l.next() {
l.token.File = filename
tokens = append(tokens, l.token)
}
return tokens, nil
}

View file

@ -15,37 +15,35 @@
package caddyfile package caddyfile
import ( import (
"log"
"strings"
"testing" "testing"
) )
type lexerTestCase struct { type lexerTestCase struct {
input string input []byte
expected []Token expected []Token
} }
func TestLexer(t *testing.T) { func TestLexer(t *testing.T) {
testCases := []lexerTestCase{ testCases := []lexerTestCase{
{ {
input: `host:123`, input: []byte(`host:123`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "host:123"}, {Line: 1, Text: "host:123"},
}, },
}, },
{ {
input: `host:123 input: []byte(`host:123
directive`, directive`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "host:123"}, {Line: 1, Text: "host:123"},
{Line: 3, Text: "directive"}, {Line: 3, Text: "directive"},
}, },
}, },
{ {
input: `host:123 { input: []byte(`host:123 {
directive directive
}`, }`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "host:123"}, {Line: 1, Text: "host:123"},
{Line: 1, Text: "{"}, {Line: 1, Text: "{"},
@ -54,7 +52,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: `host:123 { directive }`, input: []byte(`host:123 { directive }`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "host:123"}, {Line: 1, Text: "host:123"},
{Line: 1, Text: "{"}, {Line: 1, Text: "{"},
@ -63,12 +61,12 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: `host:123 { input: []byte(`host:123 {
#comment #comment
directive directive
# comment # comment
foobar # another comment foobar # another comment
}`, }`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "host:123"}, {Line: 1, Text: "host:123"},
{Line: 1, Text: "{"}, {Line: 1, Text: "{"},
@ -78,10 +76,10 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: `host:123 { input: []byte(`host:123 {
# hash inside string is not a comment # hash inside string is not a comment
redir / /some/#/path redir / /some/#/path
}`, }`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "host:123"}, {Line: 1, Text: "host:123"},
{Line: 1, Text: "{"}, {Line: 1, Text: "{"},
@ -92,14 +90,14 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "# comment at beginning of file\n# comment at beginning of line\nhost:123", input: []byte("# comment at beginning of file\n# comment at beginning of line\nhost:123"),
expected: []Token{ expected: []Token{
{Line: 3, Text: "host:123"}, {Line: 3, Text: "host:123"},
}, },
}, },
{ {
input: `a "quoted value" b input: []byte(`a "quoted value" b
foobar`, foobar`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "a"}, {Line: 1, Text: "a"},
{Line: 1, Text: "quoted value"}, {Line: 1, Text: "quoted value"},
@ -108,7 +106,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: `A "quoted \"value\" inside" B`, input: []byte(`A "quoted \"value\" inside" B`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "A"}, {Line: 1, Text: "A"},
{Line: 1, Text: `quoted "value" inside`}, {Line: 1, Text: `quoted "value" inside`},
@ -116,7 +114,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "An escaped \"newline\\\ninside\" quotes", input: []byte("An escaped \"newline\\\ninside\" quotes"),
expected: []Token{ expected: []Token{
{Line: 1, Text: "An"}, {Line: 1, Text: "An"},
{Line: 1, Text: "escaped"}, {Line: 1, Text: "escaped"},
@ -125,7 +123,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "An escaped newline\\\noutside quotes", input: []byte("An escaped newline\\\noutside quotes"),
expected: []Token{ expected: []Token{
{Line: 1, Text: "An"}, {Line: 1, Text: "An"},
{Line: 1, Text: "escaped"}, {Line: 1, Text: "escaped"},
@ -135,7 +133,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "line1\\\nescaped\nline2\nline3", input: []byte("line1\\\nescaped\nline2\nline3"),
expected: []Token{ expected: []Token{
{Line: 1, Text: "line1"}, {Line: 1, Text: "line1"},
{Line: 1, Text: "escaped"}, {Line: 1, Text: "escaped"},
@ -144,7 +142,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "line1\\\nescaped1\\\nescaped2\nline4\nline5", input: []byte("line1\\\nescaped1\\\nescaped2\nline4\nline5"),
expected: []Token{ expected: []Token{
{Line: 1, Text: "line1"}, {Line: 1, Text: "line1"},
{Line: 1, Text: "escaped1"}, {Line: 1, Text: "escaped1"},
@ -154,34 +152,34 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: `"unescapable\ in quotes"`, input: []byte(`"unescapable\ in quotes"`),
expected: []Token{ expected: []Token{
{Line: 1, Text: `unescapable\ in quotes`}, {Line: 1, Text: `unescapable\ in quotes`},
}, },
}, },
{ {
input: `"don't\escape"`, input: []byte(`"don't\escape"`),
expected: []Token{ expected: []Token{
{Line: 1, Text: `don't\escape`}, {Line: 1, Text: `don't\escape`},
}, },
}, },
{ {
input: `"don't\\escape"`, input: []byte(`"don't\\escape"`),
expected: []Token{ expected: []Token{
{Line: 1, Text: `don't\\escape`}, {Line: 1, Text: `don't\\escape`},
}, },
}, },
{ {
input: `un\escapable`, input: []byte(`un\escapable`),
expected: []Token{ expected: []Token{
{Line: 1, Text: `un\escapable`}, {Line: 1, Text: `un\escapable`},
}, },
}, },
{ {
input: `A "quoted value with line input: []byte(`A "quoted value with line
break inside" { break inside" {
foobar foobar
}`, }`),
expected: []Token{ expected: []Token{
{Line: 1, Text: "A"}, {Line: 1, Text: "A"},
{Line: 1, Text: "quoted value with line\n\t\t\t\t\tbreak inside"}, {Line: 1, Text: "quoted value with line\n\t\t\t\t\tbreak inside"},
@ -191,13 +189,13 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: `"C:\php\php-cgi.exe"`, input: []byte(`"C:\php\php-cgi.exe"`),
expected: []Token{ expected: []Token{
{Line: 1, Text: `C:\php\php-cgi.exe`}, {Line: 1, Text: `C:\php\php-cgi.exe`},
}, },
}, },
{ {
input: `empty "" string`, input: []byte(`empty "" string`),
expected: []Token{ expected: []Token{
{Line: 1, Text: `empty`}, {Line: 1, Text: `empty`},
{Line: 1, Text: ``}, {Line: 1, Text: ``},
@ -205,7 +203,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "skip those\r\nCR characters", input: []byte("skip those\r\nCR characters"),
expected: []Token{ expected: []Token{
{Line: 1, Text: "skip"}, {Line: 1, Text: "skip"},
{Line: 1, Text: "those"}, {Line: 1, Text: "those"},
@ -214,13 +212,13 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "\xEF\xBB\xBF:8080", // test with leading byte order mark input: []byte("\xEF\xBB\xBF:8080"), // test with leading byte order mark
expected: []Token{ expected: []Token{
{Line: 1, Text: ":8080"}, {Line: 1, Text: ":8080"},
}, },
}, },
{ {
input: "simple `backtick quoted` string", input: []byte("simple `backtick quoted` string"),
expected: []Token{ expected: []Token{
{Line: 1, Text: `simple`}, {Line: 1, Text: `simple`},
{Line: 1, Text: `backtick quoted`}, {Line: 1, Text: `backtick quoted`},
@ -228,7 +226,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "multiline `backtick\nquoted\n` string", input: []byte("multiline `backtick\nquoted\n` string"),
expected: []Token{ expected: []Token{
{Line: 1, Text: `multiline`}, {Line: 1, Text: `multiline`},
{Line: 1, Text: "backtick\nquoted\n"}, {Line: 1, Text: "backtick\nquoted\n"},
@ -236,7 +234,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "nested `\"quotes inside\" backticks` string", input: []byte("nested `\"quotes inside\" backticks` string"),
expected: []Token{ expected: []Token{
{Line: 1, Text: `nested`}, {Line: 1, Text: `nested`},
{Line: 1, Text: `"quotes inside" backticks`}, {Line: 1, Text: `"quotes inside" backticks`},
@ -244,7 +242,7 @@ func TestLexer(t *testing.T) {
}, },
}, },
{ {
input: "reverse-nested \"`backticks` inside\" quotes", input: []byte("reverse-nested \"`backticks` inside\" quotes"),
expected: []Token{ expected: []Token{
{Line: 1, Text: `reverse-nested`}, {Line: 1, Text: `reverse-nested`},
{Line: 1, Text: "`backticks` inside"}, {Line: 1, Text: "`backticks` inside"},
@ -254,22 +252,14 @@ func TestLexer(t *testing.T) {
} }
for i, testCase := range testCases { for i, testCase := range testCases {
actual := tokenize(testCase.input) actual, err := Tokenize(testCase.input, "")
if err != nil {
t.Errorf("%v", err)
}
lexerCompare(t, i, testCase.expected, actual) lexerCompare(t, i, testCase.expected, actual)
} }
} }
func tokenize(input string) (tokens []Token) {
l := lexer{}
if err := l.load(strings.NewReader(input)); err != nil {
log.Printf("[ERROR] load failed: %v", err)
}
for l.next() {
tokens = append(tokens, l.token)
}
return
}
func lexerCompare(t *testing.T, n int, expected, actual []Token) { func lexerCompare(t *testing.T, n int, expected, actual []Token) {
if len(expected) != len(actual) { if len(expected) != len(actual) {
t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual)) t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual))

View file

@ -87,16 +87,10 @@ func allTokens(filename string, input []byte) ([]Token, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
l := new(lexer) tokens, err := Tokenize(input, filename)
err = l.load(bytes.NewReader(input))
if err != nil { if err != nil {
return nil, err return nil, err
} }
var tokens []Token
for l.next() {
l.token.File = filename
tokens = append(tokens, l.token)
}
return tokens, nil return tokens, nil
} }