master
Jordan Orelli 10 years ago
commit ed55f2c4ad

124
lex.go

@ -0,0 +1,124 @@
package main
import (
"errors"
"fmt"
"io"
"strings"
)
type tokenType int
const (
t_error tokenType = iota // a stored lex error
t_string // a string literal
)
type stateFn func(*lexer) (stateFn, error)
type token struct {
t tokenType
s string
}
type lexer struct {
in io.RuneReader
out chan token
buf []rune // running buffer for current lexeme
}
func (l *lexer) lex() {
defer close(l.out)
var err error
fn := lexRoot
for {
fn, err = fn(l)
switch err {
case nil:
case io.EOF:
return
default:
l.out <- token{t_error, err.Error()}
return
}
}
}
func (l *lexer) next() (rune, error) {
r, _, err := l.in.ReadRune()
return r, err
}
func (l *lexer) keep(r rune) {
if l.buf == nil {
l.buf = make([]rune, 0, 18)
}
l.buf = append(l.buf, r)
}
func (l *lexer) emit(t tokenType) {
l.out <- token{t, string(l.buf)}
l.buf = l.buf[0:0]
}
func lexString(in string) chan token {
r := strings.NewReader(in)
return lex(r)
}
func lex(r io.RuneReader) chan token {
l := lexer{
in: r,
out: make(chan token),
}
go l.lex()
return l.out
}
func fullTokens(c chan token) ([]token, error) {
tokens := make([]token, 0, 32)
for t := range c {
if t.t == t_error {
return nil, errors.New(t.s)
}
tokens = append(tokens, t)
}
return tokens, nil
}
func lexRoot(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case '"', '`':
return lexStringLiteral(r), nil
default:
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r)
}
}
func lexStringLiteral(delim rune) stateFn {
return func(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case delim:
l.emit(t_string)
return lexRoot, nil
case '\\':
r, err := l.next()
if err != nil {
return nil, err
}
l.keep(r)
return lexStringLiteral(delim), nil
default:
l.keep(r)
return lexStringLiteral(delim), nil
}
}
}

@ -0,0 +1,31 @@
package main
import (
"testing"
)
var primitivesTests = []struct {
in string
out token
}{
{`"x"`, token{t_string, "x"}},
{`"yes"`, token{t_string, "yes"}},
{`"this one has spaces"`, token{t_string, "this one has spaces"}},
{`"this one has \"quotes\" in it"`, token{t_string, `this one has "quotes" in it`}},
{"`this one is delimited by backticks`", token{t_string, "this one is delimited by backticks"}},
}
func TestLexPrimities(t *testing.T) {
for _, test := range primitivesTests {
tokens, err := fullTokens(lexString(test.in))
if err != nil {
t.Error(err)
continue
}
if len(tokens) > 1 {
t.Errorf("expected 1 token, saw %d: %v", len(tokens), tokens)
continue
}
t.Logf("OK: %s", test.in)
}
}
Loading…
Cancel
Save