caddy/config/parse/lexer.go

package parse

import (
	"bufio"
	"io"
	"unicode"
)

type (
	// lexer is a utility which can get values, token by
	// token, from a Reader. A token is a word, and tokens
	// are separated by whitespace. A word can be enclosed
	// in quotes if it contains whitespace.
	lexer struct {
		reader *bufio.Reader
		token  token
		line   int
	}

	// token represents a single parsable unit.
	token struct {
		line int
		text string
	}
)

// load prepares the lexer to scan an input for tokens.
func (l *lexer) load(input io.Reader) error {
	l.reader = bufio.NewReader(input)
	l.line = 1
	return nil
}

// next loads the next token into the lexer.
// A token is delimited by whitespace, unless
// the token starts with a quotes character (")
// in which case the token goes until the closing
// quotes (the enclosing quotes are not included).
// Inside quoted strings, quotes may be escaped
// with a preceding \ character. No other chars
// may be escaped. The rest of the line is skipped
// if a "#" character is read in. Returns true if
// a token was loaded; false otherwise.
func (l *lexer) next() bool {
	var val []rune
	var comment, quoted, escaped bool

	makeToken := func() bool {
		l.token.text = string(val)
		return true
	}

	for {
		ch, _, err := l.reader.ReadRune()
		if err != nil {
			if len(val) > 0 {
				return makeToken()
			}
			if err == io.EOF {
				return false
			}
			panic(err)
		}

		if quoted {
			if !escaped {
				if ch == '\\' {
					escaped = true
					continue
				} else if ch == '"' {
					quoted = false
					return makeToken()
				}
			}
			if ch == '\n' {
				l.line++
			}
			if escaped {
				// only escape quotes
				if ch != '"' {
					val = append(val, '\\')
				}
			}
			val = append(val, ch)
			escaped = false
			continue
		}

		if unicode.IsSpace(ch) {
			if ch == '\r' {
				continue
			}
			if ch == '\n' {
				l.line++
				comment = false
			}
			if len(val) > 0 {
				return makeToken()
			}
			continue
		}

		if ch == '#' {
			comment = true
		}

		if comment {
			continue
		}

		if len(val) == 0 {
			l.token = token{line: l.line}
			if ch == '"' {
				quoted = true
				continue
			}
		}

		val = append(val, ch)
	}
}
Major refactoring of middleware and parser in progress 2015-05-04 12:04:17 -05:00			`package parse`

			`import (`
			`"bufio"`
			`"io"`
			`"unicode"`
			`)`

			`type (`
			`// lexer is a utility which can get values, token by`
			`// token, from a Reader. A token is a word, and tokens`
			`// are separated by whitespace. A word can be enclosed`
			`// in quotes if it contains whitespace.`
			`lexer struct {`
			`reader *bufio.Reader`
			`token token`
			`line int`
			`}`

			`// token represents a single parsable unit.`
			`token struct {`
			`line int`
			`text string`
			`}`
			`)`

			`// load prepares the lexer to scan an input for tokens.`
			`func (l *lexer) load(input io.Reader) error {`
			`l.reader = bufio.NewReader(input)`
			`l.line = 1`
			`return nil`
			`}`

			`// next loads the next token into the lexer.`
			`// A token is delimited by whitespace, unless`
			`// the token starts with a quotes character (")`
			`// in which case the token goes until the closing`
			`// quotes (the enclosing quotes are not included).`
lexer: Fixed backslashes in quoted strings (closes #65) 2015-05-08 11:32:57 -05:00			`// Inside quoted strings, quotes may be escaped`
			`// with a preceding \ character. No other chars`
			`// may be escaped. The rest of the line is skipped`
			`// if a "#" character is read in. Returns true if`
			`// a token was loaded; false otherwise.`
Major refactoring of middleware and parser in progress 2015-05-04 12:04:17 -05:00			`func (l *lexer) next() bool {`
			`var val []rune`
			`var comment, quoted, escaped bool`

			`makeToken := func() bool {`
			`l.token.text = string(val)`
			`return true`
			`}`

			`for {`
			`ch, _, err := l.reader.ReadRune()`
			`if err != nil {`
			`if len(val) > 0 {`
			`return makeToken()`
			`}`
			`if err == io.EOF {`
			`return false`
			`}`
Fix more lint warnings 2015-05-24 21:52:34 -05:00			`panic(err)`
Major refactoring of middleware and parser in progress 2015-05-04 12:04:17 -05:00			`}`

			`if quoted {`
			`if !escaped {`
			`if ch == '\\' {`
			`escaped = true`
			`continue`
			`} else if ch == '"' {`
			`quoted = false`
			`return makeToken()`
			`}`
			`}`
			`if ch == '\n' {`
			`l.line++`
			`}`
lexer: Fixed backslashes in quoted strings (closes #65) 2015-05-08 11:32:57 -05:00			`if escaped {`
			`// only escape quotes`
			`if ch != '"' {`
			`val = append(val, '\\')`
			`}`
			`}`
Major refactoring of middleware and parser in progress 2015-05-04 12:04:17 -05:00			`val = append(val, ch)`
			`escaped = false`
			`continue`
			`}`

			`if unicode.IsSpace(ch) {`
			`if ch == '\r' {`
			`continue`
			`}`
			`if ch == '\n' {`
			`l.line++`
			`comment = false`
			`}`
			`if len(val) > 0 {`
			`return makeToken()`
			`}`
			`continue`
			`}`

			`if ch == '#' {`
			`comment = true`
			`}`

			`if comment {`
			`continue`
			`}`

			`if len(val) == 0 {`
			`l.token = token{line: l.line}`
			`if ch == '"' {`
			`quoted = true`
			`continue`
			`}`
			`}`

			`val = append(val, ch)`
			`}`
			`}`