2015-05-04 11:04:17 -06:00
|
|
|
package parse
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"io"
|
|
|
|
"unicode"
|
|
|
|
)
|
|
|
|
|
|
|
|
type (
|
|
|
|
// lexer is a utility which can get values, token by
|
|
|
|
// token, from a Reader. A token is a word, and tokens
|
|
|
|
// are separated by whitespace. A word can be enclosed
|
|
|
|
// in quotes if it contains whitespace.
|
|
|
|
lexer struct {
|
|
|
|
reader *bufio.Reader
|
|
|
|
token token
|
|
|
|
line int
|
|
|
|
}
|
|
|
|
|
|
|
|
// token represents a single parsable unit.
|
|
|
|
token struct {
|
2015-07-07 22:38:48 -06:00
|
|
|
file string
|
2015-05-04 11:04:17 -06:00
|
|
|
line int
|
|
|
|
text string
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
// load prepares the lexer to scan an input for tokens.
|
|
|
|
func (l *lexer) load(input io.Reader) error {
|
|
|
|
l.reader = bufio.NewReader(input)
|
|
|
|
l.line = 1
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// next loads the next token into the lexer.
|
|
|
|
// A token is delimited by whitespace, unless
|
|
|
|
// the token starts with a quotes character (")
|
|
|
|
// in which case the token goes until the closing
|
|
|
|
// quotes (the enclosing quotes are not included).
|
2015-05-08 10:32:57 -06:00
|
|
|
// Inside quoted strings, quotes may be escaped
|
|
|
|
// with a preceding \ character. No other chars
|
|
|
|
// may be escaped. The rest of the line is skipped
|
|
|
|
// if a "#" character is read in. Returns true if
|
|
|
|
// a token was loaded; false otherwise.
|
2015-05-04 11:04:17 -06:00
|
|
|
func (l *lexer) next() bool {
|
|
|
|
var val []rune
|
|
|
|
var comment, quoted, escaped bool
|
|
|
|
|
|
|
|
makeToken := func() bool {
|
|
|
|
l.token.text = string(val)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
ch, _, err := l.reader.ReadRune()
|
|
|
|
if err != nil {
|
|
|
|
if len(val) > 0 {
|
|
|
|
return makeToken()
|
|
|
|
}
|
|
|
|
if err == io.EOF {
|
|
|
|
return false
|
|
|
|
}
|
2015-05-24 22:52:34 -04:00
|
|
|
panic(err)
|
2015-05-04 11:04:17 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if quoted {
|
|
|
|
if !escaped {
|
|
|
|
if ch == '\\' {
|
|
|
|
escaped = true
|
|
|
|
continue
|
|
|
|
} else if ch == '"' {
|
|
|
|
quoted = false
|
|
|
|
return makeToken()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ch == '\n' {
|
|
|
|
l.line++
|
|
|
|
}
|
2015-05-08 10:32:57 -06:00
|
|
|
if escaped {
|
|
|
|
// only escape quotes
|
|
|
|
if ch != '"' {
|
|
|
|
val = append(val, '\\')
|
|
|
|
}
|
|
|
|
}
|
2015-05-04 11:04:17 -06:00
|
|
|
val = append(val, ch)
|
|
|
|
escaped = false
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if unicode.IsSpace(ch) {
|
|
|
|
if ch == '\r' {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if ch == '\n' {
|
|
|
|
l.line++
|
|
|
|
comment = false
|
|
|
|
}
|
|
|
|
if len(val) > 0 {
|
|
|
|
return makeToken()
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if ch == '#' {
|
|
|
|
comment = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if comment {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(val) == 0 {
|
|
|
|
l.token = token{line: l.line}
|
|
|
|
if ch == '"' {
|
|
|
|
quoted = true
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
val = append(val, ch)
|
|
|
|
}
|
|
|
|
}
|