2021-09-16 12:50:32 -06:00
|
|
|
// Copyright 2015 Matthew Holt and The Caddy Authors
|
2019-08-09 12:05:47 -06:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package caddyfile
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2020-07-20 15:55:51 -04:00
|
|
|
"bytes"
|
2019-08-09 12:05:47 -06:00
|
|
|
"io"
|
|
|
|
"unicode"
|
|
|
|
)
|
|
|
|
|
|
|
|
type (
|
|
|
|
// lexer is a utility which can get values, token by
|
|
|
|
// token, from a Reader. A token is a word, and tokens
|
|
|
|
// are separated by whitespace. A word can be enclosed
|
|
|
|
// in quotes if it contains whitespace.
|
|
|
|
lexer struct {
|
2019-09-28 21:18:36 -06:00
|
|
|
reader *bufio.Reader
|
|
|
|
token Token
|
|
|
|
line int
|
|
|
|
skippedLines int
|
2019-08-09 12:05:47 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Token represents a single parsable unit.
|
|
|
|
Token struct {
|
2021-04-09 18:06:25 +00:00
|
|
|
File string
|
|
|
|
Line int
|
|
|
|
Text string
|
|
|
|
inSnippet bool
|
|
|
|
snippetName string
|
2019-08-09 12:05:47 -06:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
// load prepares the lexer to scan an input for tokens.
|
|
|
|
// It discards any leading byte order mark.
|
|
|
|
func (l *lexer) load(input io.Reader) error {
|
|
|
|
l.reader = bufio.NewReader(input)
|
|
|
|
l.line = 1
|
|
|
|
|
|
|
|
// discard byte order mark, if present
|
|
|
|
firstCh, _, err := l.reader.ReadRune()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if firstCh != 0xFEFF {
|
|
|
|
err := l.reader.UnreadRune()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// next loads the next token into the lexer.
|
|
|
|
// A token is delimited by whitespace, unless
|
|
|
|
// the token starts with a quotes character (")
|
|
|
|
// in which case the token goes until the closing
|
|
|
|
// quotes (the enclosing quotes are not included).
|
|
|
|
// Inside quoted strings, quotes may be escaped
|
|
|
|
// with a preceding \ character. No other chars
|
|
|
|
// may be escaped. The rest of the line is skipped
|
|
|
|
// if a "#" character is read in. Returns true if
|
|
|
|
// a token was loaded; false otherwise.
|
|
|
|
func (l *lexer) next() bool {
|
|
|
|
var val []rune
|
2020-05-05 12:27:49 -06:00
|
|
|
var comment, quoted, btQuoted, escaped bool
|
2019-08-09 12:05:47 -06:00
|
|
|
|
|
|
|
makeToken := func() bool {
|
|
|
|
l.token.Text = string(val)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
ch, _, err := l.reader.ReadRune()
|
|
|
|
if err != nil {
|
|
|
|
if len(val) > 0 {
|
|
|
|
return makeToken()
|
|
|
|
}
|
|
|
|
if err == io.EOF {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
2020-05-05 12:27:49 -06:00
|
|
|
if !escaped && !btQuoted && ch == '\\' {
|
2019-09-28 21:18:36 -06:00
|
|
|
escaped = true
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-05-05 12:27:49 -06:00
|
|
|
if quoted || btQuoted {
|
|
|
|
if quoted && escaped {
|
2019-09-28 21:18:36 -06:00
|
|
|
// all is literal in quoted area,
|
|
|
|
// so only escape quotes
|
|
|
|
if ch != '"' {
|
|
|
|
val = append(val, '\\')
|
|
|
|
}
|
|
|
|
escaped = false
|
|
|
|
} else {
|
2020-05-05 12:27:49 -06:00
|
|
|
if quoted && ch == '"' {
|
|
|
|
return makeToken()
|
|
|
|
}
|
|
|
|
if btQuoted && ch == '`' {
|
2019-08-09 12:05:47 -06:00
|
|
|
return makeToken()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ch == '\n' {
|
2019-09-28 21:18:36 -06:00
|
|
|
l.line += 1 + l.skippedLines
|
|
|
|
l.skippedLines = 0
|
2019-08-09 12:05:47 -06:00
|
|
|
}
|
|
|
|
val = append(val, ch)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if unicode.IsSpace(ch) {
|
|
|
|
if ch == '\r' {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if ch == '\n' {
|
2019-09-28 21:18:36 -06:00
|
|
|
if escaped {
|
|
|
|
l.skippedLines++
|
|
|
|
escaped = false
|
|
|
|
} else {
|
|
|
|
l.line += 1 + l.skippedLines
|
|
|
|
l.skippedLines = 0
|
|
|
|
}
|
2019-08-09 12:05:47 -06:00
|
|
|
comment = false
|
|
|
|
}
|
|
|
|
if len(val) > 0 {
|
|
|
|
return makeToken()
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-05-05 14:32:12 -04:00
|
|
|
if ch == '#' && len(val) == 0 {
|
2019-08-09 12:05:47 -06:00
|
|
|
comment = true
|
|
|
|
}
|
|
|
|
if comment {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(val) == 0 {
|
|
|
|
l.token = Token{Line: l.line}
|
|
|
|
if ch == '"' {
|
|
|
|
quoted = true
|
|
|
|
continue
|
|
|
|
}
|
2020-05-05 12:27:49 -06:00
|
|
|
if ch == '`' {
|
|
|
|
btQuoted = true
|
|
|
|
continue
|
|
|
|
}
|
2019-08-09 12:05:47 -06:00
|
|
|
}
|
|
|
|
|
2019-10-15 16:05:53 -06:00
|
|
|
if escaped {
|
|
|
|
val = append(val, '\\')
|
|
|
|
escaped = false
|
|
|
|
}
|
|
|
|
|
2019-08-09 12:05:47 -06:00
|
|
|
val = append(val, ch)
|
|
|
|
}
|
|
|
|
}
|
2020-07-20 15:55:51 -04:00
|
|
|
|
|
|
|
// Tokenize takes bytes as input and lexes it into
|
|
|
|
// a list of tokens that can be parsed as a Caddyfile.
|
|
|
|
// Also takes a filename to fill the token's File as
|
|
|
|
// the source of the tokens, which is important to
|
|
|
|
// determine relative paths for `import` directives.
|
|
|
|
func Tokenize(input []byte, filename string) ([]Token, error) {
|
|
|
|
l := lexer{}
|
|
|
|
if err := l.load(bytes.NewReader(input)); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
var tokens []Token
|
|
|
|
for l.next() {
|
|
|
|
l.token.File = filename
|
|
|
|
tokens = append(tokens, l.token)
|
|
|
|
}
|
|
|
|
return tokens, nil
|
|
|
|
}
|