mirror of
https://github.com/caddyserver/caddy.git
synced 2024-12-30 22:34:15 -05:00
caddyfile: Export Tokenize function for lexing (#3549)
This commit is contained in:
parent
6cea1f239d
commit
fb9d874fa9
3 changed files with 57 additions and 54 deletions
|
@ -16,6 +16,7 @@ package caddyfile
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"io"
|
"io"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
@ -168,3 +169,21 @@ func (l *lexer) next() bool {
|
||||||
val = append(val, ch)
|
val = append(val, ch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tokenize takes bytes as input and lexes it into
|
||||||
|
// a list of tokens that can be parsed as a Caddyfile.
|
||||||
|
// Also takes a filename to fill the token's File as
|
||||||
|
// the source of the tokens, which is important to
|
||||||
|
// determine relative paths for `import` directives.
|
||||||
|
func Tokenize(input []byte, filename string) ([]Token, error) {
|
||||||
|
l := lexer{}
|
||||||
|
if err := l.load(bytes.NewReader(input)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var tokens []Token
|
||||||
|
for l.next() {
|
||||||
|
l.token.File = filename
|
||||||
|
tokens = append(tokens, l.token)
|
||||||
|
}
|
||||||
|
return tokens, nil
|
||||||
|
}
|
||||||
|
|
|
@ -15,37 +15,35 @@
|
||||||
package caddyfile
|
package caddyfile
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
type lexerTestCase struct {
|
type lexerTestCase struct {
|
||||||
input string
|
input []byte
|
||||||
expected []Token
|
expected []Token
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLexer(t *testing.T) {
|
func TestLexer(t *testing.T) {
|
||||||
testCases := []lexerTestCase{
|
testCases := []lexerTestCase{
|
||||||
{
|
{
|
||||||
input: `host:123`,
|
input: []byte(`host:123`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "host:123"},
|
{Line: 1, Text: "host:123"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `host:123
|
input: []byte(`host:123
|
||||||
|
|
||||||
directive`,
|
directive`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "host:123"},
|
{Line: 1, Text: "host:123"},
|
||||||
{Line: 3, Text: "directive"},
|
{Line: 3, Text: "directive"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `host:123 {
|
input: []byte(`host:123 {
|
||||||
directive
|
directive
|
||||||
}`,
|
}`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "host:123"},
|
{Line: 1, Text: "host:123"},
|
||||||
{Line: 1, Text: "{"},
|
{Line: 1, Text: "{"},
|
||||||
|
@ -54,7 +52,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `host:123 { directive }`,
|
input: []byte(`host:123 { directive }`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "host:123"},
|
{Line: 1, Text: "host:123"},
|
||||||
{Line: 1, Text: "{"},
|
{Line: 1, Text: "{"},
|
||||||
|
@ -63,12 +61,12 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `host:123 {
|
input: []byte(`host:123 {
|
||||||
#comment
|
#comment
|
||||||
directive
|
directive
|
||||||
# comment
|
# comment
|
||||||
foobar # another comment
|
foobar # another comment
|
||||||
}`,
|
}`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "host:123"},
|
{Line: 1, Text: "host:123"},
|
||||||
{Line: 1, Text: "{"},
|
{Line: 1, Text: "{"},
|
||||||
|
@ -78,10 +76,10 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `host:123 {
|
input: []byte(`host:123 {
|
||||||
# hash inside string is not a comment
|
# hash inside string is not a comment
|
||||||
redir / /some/#/path
|
redir / /some/#/path
|
||||||
}`,
|
}`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "host:123"},
|
{Line: 1, Text: "host:123"},
|
||||||
{Line: 1, Text: "{"},
|
{Line: 1, Text: "{"},
|
||||||
|
@ -92,14 +90,14 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "# comment at beginning of file\n# comment at beginning of line\nhost:123",
|
input: []byte("# comment at beginning of file\n# comment at beginning of line\nhost:123"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 3, Text: "host:123"},
|
{Line: 3, Text: "host:123"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `a "quoted value" b
|
input: []byte(`a "quoted value" b
|
||||||
foobar`,
|
foobar`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "a"},
|
{Line: 1, Text: "a"},
|
||||||
{Line: 1, Text: "quoted value"},
|
{Line: 1, Text: "quoted value"},
|
||||||
|
@ -108,7 +106,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `A "quoted \"value\" inside" B`,
|
input: []byte(`A "quoted \"value\" inside" B`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "A"},
|
{Line: 1, Text: "A"},
|
||||||
{Line: 1, Text: `quoted "value" inside`},
|
{Line: 1, Text: `quoted "value" inside`},
|
||||||
|
@ -116,7 +114,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "An escaped \"newline\\\ninside\" quotes",
|
input: []byte("An escaped \"newline\\\ninside\" quotes"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "An"},
|
{Line: 1, Text: "An"},
|
||||||
{Line: 1, Text: "escaped"},
|
{Line: 1, Text: "escaped"},
|
||||||
|
@ -125,7 +123,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "An escaped newline\\\noutside quotes",
|
input: []byte("An escaped newline\\\noutside quotes"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "An"},
|
{Line: 1, Text: "An"},
|
||||||
{Line: 1, Text: "escaped"},
|
{Line: 1, Text: "escaped"},
|
||||||
|
@ -135,7 +133,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "line1\\\nescaped\nline2\nline3",
|
input: []byte("line1\\\nescaped\nline2\nline3"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "line1"},
|
{Line: 1, Text: "line1"},
|
||||||
{Line: 1, Text: "escaped"},
|
{Line: 1, Text: "escaped"},
|
||||||
|
@ -144,7 +142,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "line1\\\nescaped1\\\nescaped2\nline4\nline5",
|
input: []byte("line1\\\nescaped1\\\nescaped2\nline4\nline5"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "line1"},
|
{Line: 1, Text: "line1"},
|
||||||
{Line: 1, Text: "escaped1"},
|
{Line: 1, Text: "escaped1"},
|
||||||
|
@ -154,34 +152,34 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `"unescapable\ in quotes"`,
|
input: []byte(`"unescapable\ in quotes"`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `unescapable\ in quotes`},
|
{Line: 1, Text: `unescapable\ in quotes`},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `"don't\escape"`,
|
input: []byte(`"don't\escape"`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `don't\escape`},
|
{Line: 1, Text: `don't\escape`},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `"don't\\escape"`,
|
input: []byte(`"don't\\escape"`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `don't\\escape`},
|
{Line: 1, Text: `don't\\escape`},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `un\escapable`,
|
input: []byte(`un\escapable`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `un\escapable`},
|
{Line: 1, Text: `un\escapable`},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `A "quoted value with line
|
input: []byte(`A "quoted value with line
|
||||||
break inside" {
|
break inside" {
|
||||||
foobar
|
foobar
|
||||||
}`,
|
}`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "A"},
|
{Line: 1, Text: "A"},
|
||||||
{Line: 1, Text: "quoted value with line\n\t\t\t\t\tbreak inside"},
|
{Line: 1, Text: "quoted value with line\n\t\t\t\t\tbreak inside"},
|
||||||
|
@ -191,13 +189,13 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `"C:\php\php-cgi.exe"`,
|
input: []byte(`"C:\php\php-cgi.exe"`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `C:\php\php-cgi.exe`},
|
{Line: 1, Text: `C:\php\php-cgi.exe`},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: `empty "" string`,
|
input: []byte(`empty "" string`),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `empty`},
|
{Line: 1, Text: `empty`},
|
||||||
{Line: 1, Text: ``},
|
{Line: 1, Text: ``},
|
||||||
|
@ -205,7 +203,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "skip those\r\nCR characters",
|
input: []byte("skip those\r\nCR characters"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: "skip"},
|
{Line: 1, Text: "skip"},
|
||||||
{Line: 1, Text: "those"},
|
{Line: 1, Text: "those"},
|
||||||
|
@ -214,13 +212,13 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "\xEF\xBB\xBF:8080", // test with leading byte order mark
|
input: []byte("\xEF\xBB\xBF:8080"), // test with leading byte order mark
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: ":8080"},
|
{Line: 1, Text: ":8080"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "simple `backtick quoted` string",
|
input: []byte("simple `backtick quoted` string"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `simple`},
|
{Line: 1, Text: `simple`},
|
||||||
{Line: 1, Text: `backtick quoted`},
|
{Line: 1, Text: `backtick quoted`},
|
||||||
|
@ -228,7 +226,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "multiline `backtick\nquoted\n` string",
|
input: []byte("multiline `backtick\nquoted\n` string"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `multiline`},
|
{Line: 1, Text: `multiline`},
|
||||||
{Line: 1, Text: "backtick\nquoted\n"},
|
{Line: 1, Text: "backtick\nquoted\n"},
|
||||||
|
@ -236,7 +234,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "nested `\"quotes inside\" backticks` string",
|
input: []byte("nested `\"quotes inside\" backticks` string"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `nested`},
|
{Line: 1, Text: `nested`},
|
||||||
{Line: 1, Text: `"quotes inside" backticks`},
|
{Line: 1, Text: `"quotes inside" backticks`},
|
||||||
|
@ -244,7 +242,7 @@ func TestLexer(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "reverse-nested \"`backticks` inside\" quotes",
|
input: []byte("reverse-nested \"`backticks` inside\" quotes"),
|
||||||
expected: []Token{
|
expected: []Token{
|
||||||
{Line: 1, Text: `reverse-nested`},
|
{Line: 1, Text: `reverse-nested`},
|
||||||
{Line: 1, Text: "`backticks` inside"},
|
{Line: 1, Text: "`backticks` inside"},
|
||||||
|
@ -254,22 +252,14 @@ func TestLexer(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, testCase := range testCases {
|
for i, testCase := range testCases {
|
||||||
actual := tokenize(testCase.input)
|
actual, err := Tokenize(testCase.input, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("%v", err)
|
||||||
|
}
|
||||||
lexerCompare(t, i, testCase.expected, actual)
|
lexerCompare(t, i, testCase.expected, actual)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func tokenize(input string) (tokens []Token) {
|
|
||||||
l := lexer{}
|
|
||||||
if err := l.load(strings.NewReader(input)); err != nil {
|
|
||||||
log.Printf("[ERROR] load failed: %v", err)
|
|
||||||
}
|
|
||||||
for l.next() {
|
|
||||||
tokens = append(tokens, l.token)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func lexerCompare(t *testing.T, n int, expected, actual []Token) {
|
func lexerCompare(t *testing.T, n int, expected, actual []Token) {
|
||||||
if len(expected) != len(actual) {
|
if len(expected) != len(actual) {
|
||||||
t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual))
|
t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual))
|
||||||
|
|
|
@ -87,16 +87,10 @@ func allTokens(filename string, input []byte) ([]Token, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
l := new(lexer)
|
tokens, err := Tokenize(input, filename)
|
||||||
err = l.load(bytes.NewReader(input))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
var tokens []Token
|
|
||||||
for l.next() {
|
|
||||||
l.token.File = filename
|
|
||||||
tokens = append(tokens, l.token)
|
|
||||||
}
|
|
||||||
return tokens, nil
|
return tokens, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue