|
|
|
@ -9,6 +9,8 @@ import (
|
|
|
|
|
"unicode"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const eof = -1
|
|
|
|
|
|
|
|
|
|
type tokenType int
|
|
|
|
|
|
|
|
|
|
func (t tokenType) String() string {
|
|
|
|
@ -39,6 +41,8 @@ func (t tokenType) String() string {
|
|
|
|
|
return "t_object_separator"
|
|
|
|
|
case t_object_end:
|
|
|
|
|
return "t_object_end"
|
|
|
|
|
case t_int:
|
|
|
|
|
return "t_int"
|
|
|
|
|
default:
|
|
|
|
|
panic(fmt.Sprintf("unknown token type: %v", t))
|
|
|
|
|
}
|
|
|
|
@ -58,9 +62,10 @@ const (
|
|
|
|
|
t_object_start // {
|
|
|
|
|
t_object_end // }
|
|
|
|
|
t_object_separator // :
|
|
|
|
|
t_int // an integer
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type stateFn func(*lexer) (stateFn, error)
|
|
|
|
|
type stateFn func(*lexer) stateFn
|
|
|
|
|
|
|
|
|
|
type token struct {
|
|
|
|
|
t tokenType
|
|
|
|
@ -72,34 +77,36 @@ type lexer struct {
|
|
|
|
|
out chan token
|
|
|
|
|
buf []rune // running buffer for current lexeme
|
|
|
|
|
backup []rune
|
|
|
|
|
err error
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (l *lexer) lex() {
|
|
|
|
|
defer close(l.out)
|
|
|
|
|
var err error
|
|
|
|
|
fn := lexRoot
|
|
|
|
|
for {
|
|
|
|
|
fn, err = fn(l)
|
|
|
|
|
switch err {
|
|
|
|
|
case nil:
|
|
|
|
|
case io.EOF:
|
|
|
|
|
l.out <- token{t_eof, ""}
|
|
|
|
|
return
|
|
|
|
|
default:
|
|
|
|
|
l.out <- token{t_error, err.Error()}
|
|
|
|
|
return
|
|
|
|
|
for fn := lexRoot; fn != nil; {
|
|
|
|
|
fn = fn(l)
|
|
|
|
|
if l.err != nil {
|
|
|
|
|
fn = lexErrorf("read error: %s", l.err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (l *lexer) next() (rune, error) {
|
|
|
|
|
func (l *lexer) next() rune {
|
|
|
|
|
if len(l.backup) > 0 {
|
|
|
|
|
r := l.backup[len(l.backup)-1]
|
|
|
|
|
l.backup = l.backup[:len(l.backup)-1]
|
|
|
|
|
return r, nil
|
|
|
|
|
return r
|
|
|
|
|
}
|
|
|
|
|
r, _, err := l.in.ReadRune()
|
|
|
|
|
return r, err
|
|
|
|
|
switch err {
|
|
|
|
|
case io.EOF:
|
|
|
|
|
return eof
|
|
|
|
|
case nil:
|
|
|
|
|
return r
|
|
|
|
|
default:
|
|
|
|
|
l.err = err
|
|
|
|
|
return eof
|
|
|
|
|
}
|
|
|
|
|
return r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (l *lexer) keep(r rune) {
|
|
|
|
@ -144,139 +151,205 @@ func fullTokens(c chan token) ([]token, error) {
|
|
|
|
|
return tokens, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func lexRoot(l *lexer) (stateFn, error) {
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
func lexErrorf(t string, args ...interface{}) stateFn {
|
|
|
|
|
return func(l *lexer) stateFn {
|
|
|
|
|
l.out <- token{t_error, fmt.Sprintf(t, args...)}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func lexRoot(l *lexer) stateFn {
|
|
|
|
|
r := l.next()
|
|
|
|
|
switch {
|
|
|
|
|
case r == eof:
|
|
|
|
|
return nil
|
|
|
|
|
case r == '=':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
l.emit(t_equals)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case r == '"', r == '`':
|
|
|
|
|
return lexStringLiteral(r), nil
|
|
|
|
|
return lexStringLiteral(r)
|
|
|
|
|
case r == '#':
|
|
|
|
|
return lexComment, nil
|
|
|
|
|
return lexComment
|
|
|
|
|
case r == '[':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
l.emit(t_list_start)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case r == ']':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
l.emit(t_list_end)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case r == ',':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
l.emit(t_list_separator)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case r == '{':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
l.emit(t_object_start)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case r == '}':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
l.emit(t_object_end)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case r == ':':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
l.emit(t_object_separator)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
// case strings.IndexRune("-0123456789", r) >= 0:
|
|
|
|
|
// l.unread(r)
|
|
|
|
|
// return lexNumber, nil
|
|
|
|
|
case unicode.IsSpace(r):
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case unicode.IsLower(r):
|
|
|
|
|
l.keep(r)
|
|
|
|
|
return lexName, nil
|
|
|
|
|
return lexName
|
|
|
|
|
case unicode.IsUpper(r):
|
|
|
|
|
l.keep(r)
|
|
|
|
|
return lexType, nil
|
|
|
|
|
return lexType
|
|
|
|
|
default:
|
|
|
|
|
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r)
|
|
|
|
|
return lexErrorf("unexpected rune in lexRoot: %c", r)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func lexComment(l *lexer) (stateFn, error) {
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
switch err {
|
|
|
|
|
case io.EOF:
|
|
|
|
|
func lexComment(l *lexer) stateFn {
|
|
|
|
|
switch r := l.next(); r {
|
|
|
|
|
case '\n':
|
|
|
|
|
l.emit(t_comment)
|
|
|
|
|
return nil, io.EOF
|
|
|
|
|
case nil:
|
|
|
|
|
default:
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
switch {
|
|
|
|
|
case r == '\n':
|
|
|
|
|
return lexRoot
|
|
|
|
|
case eof:
|
|
|
|
|
l.emit(t_comment)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return nil
|
|
|
|
|
default:
|
|
|
|
|
l.keep(r)
|
|
|
|
|
return lexComment, nil
|
|
|
|
|
return lexComment
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func lexStringLiteral(delim rune) stateFn {
|
|
|
|
|
return func(l *lexer) (stateFn, error) {
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
switch r {
|
|
|
|
|
return func(l *lexer) stateFn {
|
|
|
|
|
switch r := l.next(); r {
|
|
|
|
|
case delim:
|
|
|
|
|
l.emit(t_string)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
case '\\':
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
switch r := l.next(); r {
|
|
|
|
|
case eof:
|
|
|
|
|
return lexErrorf("unexpected eof in string literal")
|
|
|
|
|
default:
|
|
|
|
|
l.keep(r)
|
|
|
|
|
return lexStringLiteral(delim), nil
|
|
|
|
|
return lexStringLiteral(delim)
|
|
|
|
|
}
|
|
|
|
|
case eof:
|
|
|
|
|
return lexErrorf("unexpected eof in string literal")
|
|
|
|
|
default:
|
|
|
|
|
l.keep(r)
|
|
|
|
|
return lexStringLiteral(delim), nil
|
|
|
|
|
return lexStringLiteral(delim)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func lexName(l *lexer) (stateFn, error) {
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
switch err {
|
|
|
|
|
case io.EOF:
|
|
|
|
|
l.emit(t_name)
|
|
|
|
|
return nil, io.EOF
|
|
|
|
|
case nil:
|
|
|
|
|
default:
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
func lexName(l *lexer) stateFn {
|
|
|
|
|
r := l.next()
|
|
|
|
|
switch {
|
|
|
|
|
case unicode.IsLetter(r), unicode.IsDigit(r), r == '_':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
return lexName, nil
|
|
|
|
|
return lexName
|
|
|
|
|
case r == eof:
|
|
|
|
|
l.emit(t_name)
|
|
|
|
|
return nil
|
|
|
|
|
default:
|
|
|
|
|
l.emit(t_name)
|
|
|
|
|
l.unread(r)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func lexType(l *lexer) (stateFn, error) {
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
switch err {
|
|
|
|
|
case io.EOF:
|
|
|
|
|
l.emit(t_type)
|
|
|
|
|
return nil, io.EOF
|
|
|
|
|
case nil:
|
|
|
|
|
default:
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
func lexType(l *lexer) stateFn {
|
|
|
|
|
r := l.next()
|
|
|
|
|
switch {
|
|
|
|
|
case unicode.IsLetter(r), unicode.IsDigit(r), r == '_':
|
|
|
|
|
l.keep(r)
|
|
|
|
|
return lexType, nil
|
|
|
|
|
return lexType
|
|
|
|
|
case r == eof:
|
|
|
|
|
l.emit(t_type)
|
|
|
|
|
return nil
|
|
|
|
|
default:
|
|
|
|
|
l.emit(t_type)
|
|
|
|
|
l.unread(r)
|
|
|
|
|
return lexRoot, nil
|
|
|
|
|
return lexRoot
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// func lexNumber(l *lexer) (stateFn, error) {
|
|
|
|
|
// r, err := l.next()
|
|
|
|
|
// if err != nil {
|
|
|
|
|
// return nil, err
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// switch {
|
|
|
|
|
// case r == '-', r == '+':
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexNumber, nil
|
|
|
|
|
// case r == '0':
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexHexOct, nil
|
|
|
|
|
// case strings.IndexRune("123456789", r) >= 0:
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexDecimal, nil
|
|
|
|
|
// default:
|
|
|
|
|
// return nil, fmt.Errorf("unexpected rune in lexNumber: %c", r)
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// func lexHexOct(l *lexer) (stateFn, error) {
|
|
|
|
|
// r, err := l.next()
|
|
|
|
|
// if err != nil {
|
|
|
|
|
// return nil, err
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// switch {
|
|
|
|
|
// case r == 'x', r == 'X':
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexHex, nil
|
|
|
|
|
// case r == 'e', r == 'E':
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexExponent, nil
|
|
|
|
|
// case r == '8', r == '9':
|
|
|
|
|
// return nil, fmt.Errorf("unexpected 8 or 9 in lexHexOct. there's no 8 or 9 in octal!")
|
|
|
|
|
// case strings.IndexRune("01234567", r) >= 0:
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexOct, nil
|
|
|
|
|
// default:
|
|
|
|
|
// // we get here for the literals -0, +0 and 0
|
|
|
|
|
// l.unread(r)
|
|
|
|
|
// l.emit(t_int)
|
|
|
|
|
// return lexRoot, nil
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// func lexHex(l *lexer) (stateFn, error) {
|
|
|
|
|
// r, err := l.next()
|
|
|
|
|
// if err != nil {
|
|
|
|
|
// return nil, err
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// switch {
|
|
|
|
|
//
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// func lexDecimal(l *lexer) (stateFn, error) {
|
|
|
|
|
// r, err := l.next()
|
|
|
|
|
// if err != nil {
|
|
|
|
|
// return nil, err
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// switch {
|
|
|
|
|
// case strings.IndexRune("0123456789", r) >= 0:
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexDecimal, nil
|
|
|
|
|
// case r == '.':
|
|
|
|
|
// l.keep(r)
|
|
|
|
|
// return lexFloat, nil
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|