357 lines
6.2 KiB
Go
357 lines
6.2 KiB
Go
|
// TOML JSONPath lexer.
|
||
|
//
|
||
|
// Written using the principles developed by Rob Pike in
|
||
|
// http://www.youtube.com/watch?v=HxaD_trXwRE
|
||
|
|
||
|
package toml
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
// Lexer state function
|
||
|
type queryLexStateFn func() queryLexStateFn
|
||
|
|
||
|
// Lexer definition
|
||
|
type queryLexer struct {
|
||
|
input string
|
||
|
start int
|
||
|
pos int
|
||
|
width int
|
||
|
tokens chan token
|
||
|
depth int
|
||
|
line int
|
||
|
col int
|
||
|
stringTerm string
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) run() {
|
||
|
for state := l.lexVoid; state != nil; {
|
||
|
state = state()
|
||
|
}
|
||
|
close(l.tokens)
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) nextStart() {
|
||
|
// iterate by runes (utf8 characters)
|
||
|
// search for newlines and advance line/col counts
|
||
|
for i := l.start; i < l.pos; {
|
||
|
r, width := utf8.DecodeRuneInString(l.input[i:])
|
||
|
if r == '\n' {
|
||
|
l.line++
|
||
|
l.col = 1
|
||
|
} else {
|
||
|
l.col++
|
||
|
}
|
||
|
i += width
|
||
|
}
|
||
|
// advance start position to next token
|
||
|
l.start = l.pos
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) emit(t tokenType) {
|
||
|
l.tokens <- token{
|
||
|
Position: Position{l.line, l.col},
|
||
|
typ: t,
|
||
|
val: l.input[l.start:l.pos],
|
||
|
}
|
||
|
l.nextStart()
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) emitWithValue(t tokenType, value string) {
|
||
|
l.tokens <- token{
|
||
|
Position: Position{l.line, l.col},
|
||
|
typ: t,
|
||
|
val: value,
|
||
|
}
|
||
|
l.nextStart()
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) next() rune {
|
||
|
if l.pos >= len(l.input) {
|
||
|
l.width = 0
|
||
|
return eof
|
||
|
}
|
||
|
var r rune
|
||
|
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
||
|
l.pos += l.width
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) ignore() {
|
||
|
l.nextStart()
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) backup() {
|
||
|
l.pos -= l.width
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) errorf(format string, args ...interface{}) queryLexStateFn {
|
||
|
l.tokens <- token{
|
||
|
Position: Position{l.line, l.col},
|
||
|
typ: tokenError,
|
||
|
val: fmt.Sprintf(format, args...),
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) peek() rune {
|
||
|
r := l.next()
|
||
|
l.backup()
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) accept(valid string) bool {
|
||
|
if strings.ContainsRune(valid, l.next()) {
|
||
|
return true
|
||
|
}
|
||
|
l.backup()
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) follow(next string) bool {
|
||
|
return strings.HasPrefix(l.input[l.pos:], next)
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) lexVoid() queryLexStateFn {
|
||
|
for {
|
||
|
next := l.peek()
|
||
|
switch next {
|
||
|
case '$':
|
||
|
l.pos++
|
||
|
l.emit(tokenDollar)
|
||
|
continue
|
||
|
case '.':
|
||
|
if l.follow("..") {
|
||
|
l.pos += 2
|
||
|
l.emit(tokenDotDot)
|
||
|
} else {
|
||
|
l.pos++
|
||
|
l.emit(tokenDot)
|
||
|
}
|
||
|
continue
|
||
|
case '[':
|
||
|
l.pos++
|
||
|
l.emit(tokenLeftBracket)
|
||
|
continue
|
||
|
case ']':
|
||
|
l.pos++
|
||
|
l.emit(tokenRightBracket)
|
||
|
continue
|
||
|
case ',':
|
||
|
l.pos++
|
||
|
l.emit(tokenComma)
|
||
|
continue
|
||
|
case '*':
|
||
|
l.pos++
|
||
|
l.emit(tokenStar)
|
||
|
continue
|
||
|
case '(':
|
||
|
l.pos++
|
||
|
l.emit(tokenLeftParen)
|
||
|
continue
|
||
|
case ')':
|
||
|
l.pos++
|
||
|
l.emit(tokenRightParen)
|
||
|
continue
|
||
|
case '?':
|
||
|
l.pos++
|
||
|
l.emit(tokenQuestion)
|
||
|
continue
|
||
|
case ':':
|
||
|
l.pos++
|
||
|
l.emit(tokenColon)
|
||
|
continue
|
||
|
case '\'':
|
||
|
l.ignore()
|
||
|
l.stringTerm = string(next)
|
||
|
return l.lexString
|
||
|
case '"':
|
||
|
l.ignore()
|
||
|
l.stringTerm = string(next)
|
||
|
return l.lexString
|
||
|
}
|
||
|
|
||
|
if isSpace(next) {
|
||
|
l.next()
|
||
|
l.ignore()
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if isAlphanumeric(next) {
|
||
|
return l.lexKey
|
||
|
}
|
||
|
|
||
|
if next == '+' || next == '-' || isDigit(next) {
|
||
|
return l.lexNumber
|
||
|
}
|
||
|
|
||
|
if l.next() == eof {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
return l.errorf("unexpected char: '%v'", next)
|
||
|
}
|
||
|
l.emit(tokenEOF)
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) lexKey() queryLexStateFn {
|
||
|
for {
|
||
|
next := l.peek()
|
||
|
if !isAlphanumeric(next) {
|
||
|
l.emit(tokenKey)
|
||
|
return l.lexVoid
|
||
|
}
|
||
|
|
||
|
if l.next() == eof {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
l.emit(tokenEOF)
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) lexString() queryLexStateFn {
|
||
|
l.pos++
|
||
|
l.ignore()
|
||
|
growingString := ""
|
||
|
|
||
|
for {
|
||
|
if l.follow(l.stringTerm) {
|
||
|
l.emitWithValue(tokenString, growingString)
|
||
|
l.pos++
|
||
|
l.ignore()
|
||
|
return l.lexVoid
|
||
|
}
|
||
|
|
||
|
if l.follow("\\\"") {
|
||
|
l.pos++
|
||
|
growingString += "\""
|
||
|
} else if l.follow("\\'") {
|
||
|
l.pos++
|
||
|
growingString += "'"
|
||
|
} else if l.follow("\\n") {
|
||
|
l.pos++
|
||
|
growingString += "\n"
|
||
|
} else if l.follow("\\b") {
|
||
|
l.pos++
|
||
|
growingString += "\b"
|
||
|
} else if l.follow("\\f") {
|
||
|
l.pos++
|
||
|
growingString += "\f"
|
||
|
} else if l.follow("\\/") {
|
||
|
l.pos++
|
||
|
growingString += "/"
|
||
|
} else if l.follow("\\t") {
|
||
|
l.pos++
|
||
|
growingString += "\t"
|
||
|
} else if l.follow("\\r") {
|
||
|
l.pos++
|
||
|
growingString += "\r"
|
||
|
} else if l.follow("\\\\") {
|
||
|
l.pos++
|
||
|
growingString += "\\"
|
||
|
} else if l.follow("\\u") {
|
||
|
l.pos += 2
|
||
|
code := ""
|
||
|
for i := 0; i < 4; i++ {
|
||
|
c := l.peek()
|
||
|
l.pos++
|
||
|
if !isHexDigit(c) {
|
||
|
return l.errorf("unfinished unicode escape")
|
||
|
}
|
||
|
code = code + string(c)
|
||
|
}
|
||
|
l.pos--
|
||
|
intcode, err := strconv.ParseInt(code, 16, 32)
|
||
|
if err != nil {
|
||
|
return l.errorf("invalid unicode escape: \\u" + code)
|
||
|
}
|
||
|
growingString += string(rune(intcode))
|
||
|
} else if l.follow("\\U") {
|
||
|
l.pos += 2
|
||
|
code := ""
|
||
|
for i := 0; i < 8; i++ {
|
||
|
c := l.peek()
|
||
|
l.pos++
|
||
|
if !isHexDigit(c) {
|
||
|
return l.errorf("unfinished unicode escape")
|
||
|
}
|
||
|
code = code + string(c)
|
||
|
}
|
||
|
l.pos--
|
||
|
intcode, err := strconv.ParseInt(code, 16, 32)
|
||
|
if err != nil {
|
||
|
return l.errorf("invalid unicode escape: \\u" + code)
|
||
|
}
|
||
|
growingString += string(rune(intcode))
|
||
|
} else if l.follow("\\") {
|
||
|
l.pos++
|
||
|
return l.errorf("invalid escape sequence: \\" + string(l.peek()))
|
||
|
} else {
|
||
|
growingString += string(l.peek())
|
||
|
}
|
||
|
|
||
|
if l.next() == eof {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return l.errorf("unclosed string")
|
||
|
}
|
||
|
|
||
|
func (l *queryLexer) lexNumber() queryLexStateFn {
|
||
|
l.ignore()
|
||
|
if !l.accept("+") {
|
||
|
l.accept("-")
|
||
|
}
|
||
|
pointSeen := false
|
||
|
digitSeen := false
|
||
|
for {
|
||
|
next := l.next()
|
||
|
if next == '.' {
|
||
|
if pointSeen {
|
||
|
return l.errorf("cannot have two dots in one float")
|
||
|
}
|
||
|
if !isDigit(l.peek()) {
|
||
|
return l.errorf("float cannot end with a dot")
|
||
|
}
|
||
|
pointSeen = true
|
||
|
} else if isDigit(next) {
|
||
|
digitSeen = true
|
||
|
} else {
|
||
|
l.backup()
|
||
|
break
|
||
|
}
|
||
|
if pointSeen && !digitSeen {
|
||
|
return l.errorf("cannot start float with a dot")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if !digitSeen {
|
||
|
return l.errorf("no digit in that number")
|
||
|
}
|
||
|
if pointSeen {
|
||
|
l.emit(tokenFloat)
|
||
|
} else {
|
||
|
l.emit(tokenInteger)
|
||
|
}
|
||
|
return l.lexVoid
|
||
|
}
|
||
|
|
||
|
// Entry point
|
||
|
func lexQuery(input string) chan token {
|
||
|
l := &queryLexer{
|
||
|
input: input,
|
||
|
tokens: make(chan token),
|
||
|
line: 1,
|
||
|
col: 1,
|
||
|
}
|
||
|
go l.run()
|
||
|
return l.tokens
|
||
|
}
|