From ed55f2c4ad0ab96ac6332c5929295a60847ac4d1 Mon Sep 17 00:00:00 2001 From: Jordan Orelli Date: Sat, 14 Mar 2015 16:35:45 -0400 Subject: [PATCH] lex --- lex.go | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lex_test.go | 31 +++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 lex.go create mode 100644 lex_test.go diff --git a/lex.go b/lex.go new file mode 100644 index 0000000..a6b26f0 --- /dev/null +++ b/lex.go @@ -0,0 +1,124 @@ +package main + +import ( + "errors" + "fmt" + "io" + "strings" +) + +type tokenType int + +const ( + t_error tokenType = iota // a stored lex error + t_string // a string literal +) + +type stateFn func(*lexer) (stateFn, error) + +type token struct { + t tokenType + s string +} + +type lexer struct { + in io.RuneReader + out chan token + buf []rune // running buffer for current lexeme +} + +func (l *lexer) lex() { + defer close(l.out) + var err error + fn := lexRoot + for { + fn, err = fn(l) + switch err { + case nil: + case io.EOF: + return + default: + l.out <- token{t_error, err.Error()} + return + } + } +} + +func (l *lexer) next() (rune, error) { + r, _, err := l.in.ReadRune() + return r, err +} + +func (l *lexer) keep(r rune) { + if l.buf == nil { + l.buf = make([]rune, 0, 18) + } + l.buf = append(l.buf, r) +} + +func (l *lexer) emit(t tokenType) { + l.out <- token{t, string(l.buf)} + l.buf = l.buf[0:0] +} + +func lexString(in string) chan token { + r := strings.NewReader(in) + return lex(r) +} + +func lex(r io.RuneReader) chan token { + l := lexer{ + in: r, + out: make(chan token), + } + go l.lex() + return l.out +} + +func fullTokens(c chan token) ([]token, error) { + tokens := make([]token, 0, 32) + for t := range c { + if t.t == t_error { + return nil, errors.New(t.s) + } + tokens = append(tokens, t) + } + return tokens, nil +} + +func lexRoot(l *lexer) (stateFn, error) { + r, err := l.next() + if err != nil { + return nil, err + } + switch r { + case '"', '`': + return lexStringLiteral(r), nil + default: + return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r) + } +} + +func lexStringLiteral(delim rune) stateFn { + return func(l *lexer) (stateFn, error) { + r, err := l.next() + if err != nil { + return nil, err + } + switch r { + case delim: + l.emit(t_string) + return lexRoot, nil + case '\\': + r, err := l.next() + if err != nil { + return nil, err + } + l.keep(r) + return lexStringLiteral(delim), nil + default: + l.keep(r) + return lexStringLiteral(delim), nil + } + } +} diff --git a/lex_test.go b/lex_test.go new file mode 100644 index 0000000..d358083 --- /dev/null +++ b/lex_test.go @@ -0,0 +1,31 @@ +package main + +import ( + "testing" +) + +var primitivesTests = []struct { + in string + out token +}{ + {`"x"`, token{t_string, "x"}}, + {`"yes"`, token{t_string, "yes"}}, + {`"this one has spaces"`, token{t_string, "this one has spaces"}}, + {`"this one has \"quotes\" in it"`, token{t_string, `this one has "quotes" in it`}}, + {"`this one is delimited by backticks`", token{t_string, "this one is delimited by backticks"}}, +} + +func TestLexPrimities(t *testing.T) { + for _, test := range primitivesTests { + tokens, err := fullTokens(lexString(test.in)) + if err != nil { + t.Error(err) + continue + } + if len(tokens) > 1 { + t.Errorf("expected 1 token, saw %d: %v", len(tokens), tokens) + continue + } + t.Logf("OK: %s", test.in) + } +}