mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-01-02 12:40:37 -05:00
118 lines
2.3 KiB
Go
118 lines
2.3 KiB
Go
|
package revision
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"io"
|
||
|
"unicode"
|
||
|
)
|
||
|
|
||
|
// runeCategoryValidator takes a rune as input and
|
||
|
// validates it belongs to a rune category
|
||
|
type runeCategoryValidator func(r rune) bool
|
||
|
|
||
|
// tokenizeExpression aggegates a series of runes matching check predicate into a single
|
||
|
// string and provides given tokenType as token type
|
||
|
func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) {
|
||
|
var data []rune
|
||
|
data = append(data, ch)
|
||
|
|
||
|
for {
|
||
|
c, _, err := r.ReadRune()
|
||
|
|
||
|
if c == zeroRune {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if err != nil {
|
||
|
return tokenError, "", err
|
||
|
}
|
||
|
|
||
|
if check(c) {
|
||
|
data = append(data, c)
|
||
|
} else {
|
||
|
err := r.UnreadRune()
|
||
|
|
||
|
if err != nil {
|
||
|
return tokenError, "", err
|
||
|
}
|
||
|
|
||
|
return tokenType, string(data), nil
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return tokenType, string(data), nil
|
||
|
}
|
||
|
|
||
|
var zeroRune = rune(0)
|
||
|
|
||
|
// scanner represents a lexical scanner.
|
||
|
type scanner struct {
|
||
|
r *bufio.Reader
|
||
|
}
|
||
|
|
||
|
// newScanner returns a new instance of scanner.
|
||
|
func newScanner(r io.Reader) *scanner {
|
||
|
return &scanner{r: bufio.NewReader(r)}
|
||
|
}
|
||
|
|
||
|
// Scan extracts tokens and their strings counterpart
|
||
|
// from the reader
|
||
|
func (s *scanner) scan() (token, string, error) {
|
||
|
ch, _, err := s.r.ReadRune()
|
||
|
|
||
|
if err != nil && err != io.EOF {
|
||
|
return tokenError, "", err
|
||
|
}
|
||
|
|
||
|
switch ch {
|
||
|
case zeroRune:
|
||
|
return eof, "", nil
|
||
|
case ':':
|
||
|
return colon, string(ch), nil
|
||
|
case '~':
|
||
|
return tilde, string(ch), nil
|
||
|
case '^':
|
||
|
return caret, string(ch), nil
|
||
|
case '.':
|
||
|
return dot, string(ch), nil
|
||
|
case '/':
|
||
|
return slash, string(ch), nil
|
||
|
case '{':
|
||
|
return obrace, string(ch), nil
|
||
|
case '}':
|
||
|
return cbrace, string(ch), nil
|
||
|
case '-':
|
||
|
return minus, string(ch), nil
|
||
|
case '@':
|
||
|
return at, string(ch), nil
|
||
|
case '\\':
|
||
|
return aslash, string(ch), nil
|
||
|
case '?':
|
||
|
return qmark, string(ch), nil
|
||
|
case '*':
|
||
|
return asterisk, string(ch), nil
|
||
|
case '[':
|
||
|
return obracket, string(ch), nil
|
||
|
case '!':
|
||
|
return emark, string(ch), nil
|
||
|
}
|
||
|
|
||
|
if unicode.IsSpace(ch) {
|
||
|
return space, string(ch), nil
|
||
|
}
|
||
|
|
||
|
if unicode.IsControl(ch) {
|
||
|
return control, string(ch), nil
|
||
|
}
|
||
|
|
||
|
if unicode.IsLetter(ch) {
|
||
|
return tokenizeExpression(ch, word, unicode.IsLetter, s.r)
|
||
|
}
|
||
|
|
||
|
if unicode.IsNumber(ch) {
|
||
|
return tokenizeExpression(ch, number, unicode.IsNumber, s.r)
|
||
|
}
|
||
|
|
||
|
return tokenError, string(ch), nil
|
||
|
}
|