clean up, clean up

everybody, everywhere
master
Jordan Orelli 10 years ago
parent 04d982c3d9
commit 3c6fd7910c

239
lex.go

@ -9,6 +9,8 @@ import (
"unicode" "unicode"
) )
const eof = -1
type tokenType int type tokenType int
func (t tokenType) String() string { func (t tokenType) String() string {
@ -39,6 +41,8 @@ func (t tokenType) String() string {
return "t_object_separator" return "t_object_separator"
case t_object_end: case t_object_end:
return "t_object_end" return "t_object_end"
case t_int:
return "t_int"
default: default:
panic(fmt.Sprintf("unknown token type: %v", t)) panic(fmt.Sprintf("unknown token type: %v", t))
} }
@ -58,9 +62,10 @@ const (
t_object_start // { t_object_start // {
t_object_end // } t_object_end // }
t_object_separator // : t_object_separator // :
t_int // an integer
) )
type stateFn func(*lexer) (stateFn, error) type stateFn func(*lexer) stateFn
type token struct { type token struct {
t tokenType t tokenType
@ -72,34 +77,36 @@ type lexer struct {
out chan token out chan token
buf []rune // running buffer for current lexeme buf []rune // running buffer for current lexeme
backup []rune backup []rune
err error
} }
func (l *lexer) lex() { func (l *lexer) lex() {
defer close(l.out) defer close(l.out)
var err error for fn := lexRoot; fn != nil; {
fn := lexRoot fn = fn(l)
for { if l.err != nil {
fn, err = fn(l) fn = lexErrorf("read error: %s", l.err)
switch err {
case nil:
case io.EOF:
l.out <- token{t_eof, ""}
return
default:
l.out <- token{t_error, err.Error()}
return
} }
} }
} }
func (l *lexer) next() (rune, error) { func (l *lexer) next() rune {
if len(l.backup) > 0 { if len(l.backup) > 0 {
r := l.backup[len(l.backup)-1] r := l.backup[len(l.backup)-1]
l.backup = l.backup[:len(l.backup)-1] l.backup = l.backup[:len(l.backup)-1]
return r, nil return r
} }
r, _, err := l.in.ReadRune() r, _, err := l.in.ReadRune()
return r, err switch err {
case io.EOF:
return eof
case nil:
return r
default:
l.err = err
return eof
}
return r
} }
func (l *lexer) keep(r rune) { func (l *lexer) keep(r rune) {
@ -144,139 +151,205 @@ func fullTokens(c chan token) ([]token, error) {
return tokens, nil return tokens, nil
} }
func lexRoot(l *lexer) (stateFn, error) { func lexErrorf(t string, args ...interface{}) stateFn {
r, err := l.next() return func(l *lexer) stateFn {
if err != nil { l.out <- token{t_error, fmt.Sprintf(t, args...)}
return nil, err return nil
} }
}
func lexRoot(l *lexer) stateFn {
r := l.next()
switch { switch {
case r == eof:
return nil
case r == '=': case r == '=':
l.keep(r) l.keep(r)
l.emit(t_equals) l.emit(t_equals)
return lexRoot, nil return lexRoot
case r == '"', r == '`': case r == '"', r == '`':
return lexStringLiteral(r), nil return lexStringLiteral(r)
case r == '#': case r == '#':
return lexComment, nil return lexComment
case r == '[': case r == '[':
l.keep(r) l.keep(r)
l.emit(t_list_start) l.emit(t_list_start)
return lexRoot, nil return lexRoot
case r == ']': case r == ']':
l.keep(r) l.keep(r)
l.emit(t_list_end) l.emit(t_list_end)
return lexRoot, nil return lexRoot
case r == ',': case r == ',':
l.keep(r) l.keep(r)
l.emit(t_list_separator) l.emit(t_list_separator)
return lexRoot, nil return lexRoot
case r == '{': case r == '{':
l.keep(r) l.keep(r)
l.emit(t_object_start) l.emit(t_object_start)
return lexRoot, nil return lexRoot
case r == '}': case r == '}':
l.keep(r) l.keep(r)
l.emit(t_object_end) l.emit(t_object_end)
return lexRoot, nil return lexRoot
case r == ':': case r == ':':
l.keep(r) l.keep(r)
l.emit(t_object_separator) l.emit(t_object_separator)
return lexRoot, nil return lexRoot
// case strings.IndexRune("-0123456789", r) >= 0:
// l.unread(r)
// return lexNumber, nil
case unicode.IsSpace(r): case unicode.IsSpace(r):
return lexRoot, nil return lexRoot
case unicode.IsLower(r): case unicode.IsLower(r):
l.keep(r) l.keep(r)
return lexName, nil return lexName
case unicode.IsUpper(r): case unicode.IsUpper(r):
l.keep(r) l.keep(r)
return lexType, nil return lexType
default: default:
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r) return lexErrorf("unexpected rune in lexRoot: %c", r)
} }
} }
func lexComment(l *lexer) (stateFn, error) { func lexComment(l *lexer) stateFn {
r, err := l.next() switch r := l.next(); r {
switch err { case '\n':
case io.EOF:
l.emit(t_comment) l.emit(t_comment)
return nil, io.EOF return lexRoot
case nil: case eof:
default:
return nil, err
}
switch {
case r == '\n':
l.emit(t_comment) l.emit(t_comment)
return lexRoot, nil return nil
default: default:
l.keep(r) l.keep(r)
return lexComment, nil return lexComment
} }
} }
func lexStringLiteral(delim rune) stateFn { func lexStringLiteral(delim rune) stateFn {
return func(l *lexer) (stateFn, error) { return func(l *lexer) stateFn {
r, err := l.next() switch r := l.next(); r {
if err != nil {
return nil, err
}
switch r {
case delim: case delim:
l.emit(t_string) l.emit(t_string)
return lexRoot, nil return lexRoot
case '\\': case '\\':
r, err := l.next() switch r := l.next(); r {
if err != nil { case eof:
return nil, err return lexErrorf("unexpected eof in string literal")
default:
l.keep(r)
return lexStringLiteral(delim)
} }
l.keep(r) case eof:
return lexStringLiteral(delim), nil return lexErrorf("unexpected eof in string literal")
default: default:
l.keep(r) l.keep(r)
return lexStringLiteral(delim), nil return lexStringLiteral(delim)
} }
} }
} }
func lexName(l *lexer) (stateFn, error) { func lexName(l *lexer) stateFn {
r, err := l.next() r := l.next()
switch err {
case io.EOF:
l.emit(t_name)
return nil, io.EOF
case nil:
default:
return nil, err
}
switch { switch {
case unicode.IsLetter(r), unicode.IsDigit(r), r == '_': case unicode.IsLetter(r), unicode.IsDigit(r), r == '_':
l.keep(r) l.keep(r)
return lexName, nil return lexName
case r == eof:
l.emit(t_name)
return nil
default: default:
l.emit(t_name) l.emit(t_name)
l.unread(r) l.unread(r)
return lexRoot, nil return lexRoot
} }
} }
func lexType(l *lexer) (stateFn, error) { func lexType(l *lexer) stateFn {
r, err := l.next() r := l.next()
switch err {
case io.EOF:
l.emit(t_type)
return nil, io.EOF
case nil:
default:
return nil, err
}
switch { switch {
case unicode.IsLetter(r), unicode.IsDigit(r), r == '_': case unicode.IsLetter(r), unicode.IsDigit(r), r == '_':
l.keep(r) l.keep(r)
return lexType, nil return lexType
case r == eof:
l.emit(t_type)
return nil
default: default:
l.emit(t_type) l.emit(t_type)
l.unread(r) l.unread(r)
return lexRoot, nil return lexRoot
} }
} }
// func lexNumber(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
// case r == '-', r == '+':
// l.keep(r)
// return lexNumber, nil
// case r == '0':
// l.keep(r)
// return lexHexOct, nil
// case strings.IndexRune("123456789", r) >= 0:
// l.keep(r)
// return lexDecimal, nil
// default:
// return nil, fmt.Errorf("unexpected rune in lexNumber: %c", r)
// }
// }
//
// func lexHexOct(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
// case r == 'x', r == 'X':
// l.keep(r)
// return lexHex, nil
// case r == 'e', r == 'E':
// l.keep(r)
// return lexExponent, nil
// case r == '8', r == '9':
// return nil, fmt.Errorf("unexpected 8 or 9 in lexHexOct. there's no 8 or 9 in octal!")
// case strings.IndexRune("01234567", r) >= 0:
// l.keep(r)
// return lexOct, nil
// default:
// // we get here for the literals -0, +0 and 0
// l.unread(r)
// l.emit(t_int)
// return lexRoot, nil
// }
// }
//
// func lexHex(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
//
// }
// }
//
// func lexDecimal(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
// case strings.IndexRune("0123456789", r) >= 0:
// l.keep(r)
// return lexDecimal, nil
// case r == '.':
// l.keep(r)
// return lexFloat, nil
// }
// }

@ -58,7 +58,7 @@ func TestLexPrimities(t *testing.T) {
t.Error(err) t.Error(err)
continue continue
} }
tokens = tokens[:len(tokens)-1] // tokens = tokens[:len(tokens)-1]
if len(tokens) != len(test.out) { if len(tokens) != len(test.out) {
t.Errorf("expected %d token, saw %d: %v", len(test.out), len(tokens), tokens) t.Errorf("expected %d token, saw %d: %v", len(test.out), len(tokens), tokens)
continue continue

@ -36,7 +36,7 @@ func (n *rootNode) parse(p *parser) error {
t := p.next() t := p.next()
switch t.t { switch t.t {
case t_error: case t_error:
return fmt.Errorf("parse error: saw lex error while parsing root node: %v", t.s) return fmt.Errorf("parse error: saw lex error while parsing root node: %v", t)
case t_eof: case t_eof:
return nil return nil
case t_comment: case t_comment:

@ -39,7 +39,11 @@ func (p *parser) next() token {
p.backup = p.backup[:len(p.backup)-1] p.backup = p.backup[:len(p.backup)-1]
return t return t
} }
return <-p.input t, ok := <-p.input
if !ok {
return token{t_eof, "eof"}
}
return t
} }
func (p *parser) peek() token { func (p *parser) peek() token {

Loading…
Cancel
Save