From 3c6fd7910cafaa811c106a289b9290791ae63614 Mon Sep 17 00:00:00 2001 From: Jordan Orelli Date: Sun, 22 Mar 2015 10:41:12 -0400 Subject: [PATCH] clean up, clean up everybody, everywhere --- lex.go | 239 ++++++++++++++++++++++++++++++++++------------------ lex_test.go | 2 +- node.go | 2 +- parse.go | 6 +- 4 files changed, 163 insertions(+), 86 deletions(-) diff --git a/lex.go b/lex.go index 111df2f..7542a6e 100644 --- a/lex.go +++ b/lex.go @@ -9,6 +9,8 @@ import ( "unicode" ) +const eof = -1 + type tokenType int func (t tokenType) String() string { @@ -39,6 +41,8 @@ func (t tokenType) String() string { return "t_object_separator" case t_object_end: return "t_object_end" + case t_int: + return "t_int" default: panic(fmt.Sprintf("unknown token type: %v", t)) } @@ -58,9 +62,10 @@ const ( t_object_start // { t_object_end // } t_object_separator // : + t_int // an integer ) -type stateFn func(*lexer) (stateFn, error) +type stateFn func(*lexer) stateFn type token struct { t tokenType @@ -72,34 +77,36 @@ type lexer struct { out chan token buf []rune // running buffer for current lexeme backup []rune + err error } func (l *lexer) lex() { defer close(l.out) - var err error - fn := lexRoot - for { - fn, err = fn(l) - switch err { - case nil: - case io.EOF: - l.out <- token{t_eof, ""} - return - default: - l.out <- token{t_error, err.Error()} - return + for fn := lexRoot; fn != nil; { + fn = fn(l) + if l.err != nil { + fn = lexErrorf("read error: %s", l.err) } } } -func (l *lexer) next() (rune, error) { +func (l *lexer) next() rune { if len(l.backup) > 0 { r := l.backup[len(l.backup)-1] l.backup = l.backup[:len(l.backup)-1] - return r, nil + return r } r, _, err := l.in.ReadRune() - return r, err + switch err { + case io.EOF: + return eof + case nil: + return r + default: + l.err = err + return eof + } + return r } func (l *lexer) keep(r rune) { @@ -144,139 +151,205 @@ func fullTokens(c chan token) ([]token, error) { return tokens, nil } -func lexRoot(l *lexer) (stateFn, error) { - r, err := l.next() - if err != nil { - return nil, err +func lexErrorf(t string, args ...interface{}) stateFn { + return func(l *lexer) stateFn { + l.out <- token{t_error, fmt.Sprintf(t, args...)} + return nil } +} + +func lexRoot(l *lexer) stateFn { + r := l.next() switch { + case r == eof: + return nil case r == '=': l.keep(r) l.emit(t_equals) - return lexRoot, nil + return lexRoot case r == '"', r == '`': - return lexStringLiteral(r), nil + return lexStringLiteral(r) case r == '#': - return lexComment, nil + return lexComment case r == '[': l.keep(r) l.emit(t_list_start) - return lexRoot, nil + return lexRoot case r == ']': l.keep(r) l.emit(t_list_end) - return lexRoot, nil + return lexRoot case r == ',': l.keep(r) l.emit(t_list_separator) - return lexRoot, nil + return lexRoot case r == '{': l.keep(r) l.emit(t_object_start) - return lexRoot, nil + return lexRoot case r == '}': l.keep(r) l.emit(t_object_end) - return lexRoot, nil + return lexRoot case r == ':': l.keep(r) l.emit(t_object_separator) - return lexRoot, nil + return lexRoot + // case strings.IndexRune("-0123456789", r) >= 0: + // l.unread(r) + // return lexNumber, nil case unicode.IsSpace(r): - return lexRoot, nil + return lexRoot case unicode.IsLower(r): l.keep(r) - return lexName, nil + return lexName case unicode.IsUpper(r): l.keep(r) - return lexType, nil + return lexType default: - return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r) + return lexErrorf("unexpected rune in lexRoot: %c", r) } } -func lexComment(l *lexer) (stateFn, error) { - r, err := l.next() - switch err { - case io.EOF: +func lexComment(l *lexer) stateFn { + switch r := l.next(); r { + case '\n': l.emit(t_comment) - return nil, io.EOF - case nil: - default: - return nil, err - } - switch { - case r == '\n': + return lexRoot + case eof: l.emit(t_comment) - return lexRoot, nil + return nil default: l.keep(r) - return lexComment, nil + return lexComment } } func lexStringLiteral(delim rune) stateFn { - return func(l *lexer) (stateFn, error) { - r, err := l.next() - if err != nil { - return nil, err - } - switch r { + return func(l *lexer) stateFn { + switch r := l.next(); r { case delim: l.emit(t_string) - return lexRoot, nil + return lexRoot case '\\': - r, err := l.next() - if err != nil { - return nil, err + switch r := l.next(); r { + case eof: + return lexErrorf("unexpected eof in string literal") + default: + l.keep(r) + return lexStringLiteral(delim) } - l.keep(r) - return lexStringLiteral(delim), nil + case eof: + return lexErrorf("unexpected eof in string literal") default: l.keep(r) - return lexStringLiteral(delim), nil + return lexStringLiteral(delim) } } } -func lexName(l *lexer) (stateFn, error) { - r, err := l.next() - switch err { - case io.EOF: - l.emit(t_name) - return nil, io.EOF - case nil: - default: - return nil, err - } +func lexName(l *lexer) stateFn { + r := l.next() switch { case unicode.IsLetter(r), unicode.IsDigit(r), r == '_': l.keep(r) - return lexName, nil + return lexName + case r == eof: + l.emit(t_name) + return nil default: l.emit(t_name) l.unread(r) - return lexRoot, nil + return lexRoot } } -func lexType(l *lexer) (stateFn, error) { - r, err := l.next() - switch err { - case io.EOF: - l.emit(t_type) - return nil, io.EOF - case nil: - default: - return nil, err - } +func lexType(l *lexer) stateFn { + r := l.next() switch { case unicode.IsLetter(r), unicode.IsDigit(r), r == '_': l.keep(r) - return lexType, nil + return lexType + case r == eof: + l.emit(t_type) + return nil default: l.emit(t_type) l.unread(r) - return lexRoot, nil + return lexRoot } } + +// func lexNumber(l *lexer) (stateFn, error) { +// r, err := l.next() +// if err != nil { +// return nil, err +// } +// +// switch { +// case r == '-', r == '+': +// l.keep(r) +// return lexNumber, nil +// case r == '0': +// l.keep(r) +// return lexHexOct, nil +// case strings.IndexRune("123456789", r) >= 0: +// l.keep(r) +// return lexDecimal, nil +// default: +// return nil, fmt.Errorf("unexpected rune in lexNumber: %c", r) +// } +// } +// +// func lexHexOct(l *lexer) (stateFn, error) { +// r, err := l.next() +// if err != nil { +// return nil, err +// } +// +// switch { +// case r == 'x', r == 'X': +// l.keep(r) +// return lexHex, nil +// case r == 'e', r == 'E': +// l.keep(r) +// return lexExponent, nil +// case r == '8', r == '9': +// return nil, fmt.Errorf("unexpected 8 or 9 in lexHexOct. there's no 8 or 9 in octal!") +// case strings.IndexRune("01234567", r) >= 0: +// l.keep(r) +// return lexOct, nil +// default: +// // we get here for the literals -0, +0 and 0 +// l.unread(r) +// l.emit(t_int) +// return lexRoot, nil +// } +// } +// +// func lexHex(l *lexer) (stateFn, error) { +// r, err := l.next() +// if err != nil { +// return nil, err +// } +// +// switch { +// +// } +// } +// +// func lexDecimal(l *lexer) (stateFn, error) { +// r, err := l.next() +// if err != nil { +// return nil, err +// } +// +// switch { +// case strings.IndexRune("0123456789", r) >= 0: +// l.keep(r) +// return lexDecimal, nil +// case r == '.': +// l.keep(r) +// return lexFloat, nil +// } +// } diff --git a/lex_test.go b/lex_test.go index 2a0426b..3ae07d0 100644 --- a/lex_test.go +++ b/lex_test.go @@ -58,7 +58,7 @@ func TestLexPrimities(t *testing.T) { t.Error(err) continue } - tokens = tokens[:len(tokens)-1] + // tokens = tokens[:len(tokens)-1] if len(tokens) != len(test.out) { t.Errorf("expected %d token, saw %d: %v", len(test.out), len(tokens), tokens) continue diff --git a/node.go b/node.go index 370c6c1..57fec19 100644 --- a/node.go +++ b/node.go @@ -36,7 +36,7 @@ func (n *rootNode) parse(p *parser) error { t := p.next() switch t.t { case t_error: - return fmt.Errorf("parse error: saw lex error while parsing root node: %v", t.s) + return fmt.Errorf("parse error: saw lex error while parsing root node: %v", t) case t_eof: return nil case t_comment: diff --git a/parse.go b/parse.go index 5e0a693..0c28f7b 100644 --- a/parse.go +++ b/parse.go @@ -39,7 +39,11 @@ func (p *parser) next() token { p.backup = p.backup[:len(p.backup)-1] return t } - return <-p.input + t, ok := <-p.input + if !ok { + return token{t_eof, "eof"} + } + return t } func (p *parser) peek() token {