taking out a bit of redundancy in the lexer

master
Jordan Orelli 12 years ago
parent c35a4142d1
commit 8f150e038b

120
lex.go

@ -45,7 +45,8 @@ type stateFn func(*lexer) (stateFn, error)
type lexer struct { type lexer struct {
io.RuneReader io.RuneReader
cur []rune buf []rune
cur rune
depth int depth int
out chan token out chan token
} }
@ -54,23 +55,27 @@ type lexer struct {
// There's no sanity checking to make sure you don't emit some bullshit, so // There's no sanity checking to make sure you don't emit some bullshit, so
// don't fuck it up. // don't fuck it up.
func (l *lexer) emit(t typ3) { func (l *lexer) emit(t typ3) {
debugPrint("emit " + string(l.cur)) debugPrint("emit " + string(l.buf))
l.out <- token{lexeme: string(l.cur), t: t} l.out <- token{lexeme: string(l.buf), t: t}
l.cur = nil l.buf = nil
} }
func (l *lexer) nextRune() (rune, error) { func (l *lexer) next() error {
r, _, err := l.ReadRune() r, _, err := l.ReadRune()
return r, err if err != nil {
return err
}
l.cur = r
return nil
} }
// appends the rune to the current in-progress lexem // appends the rune to the current in-progress lexem
func (l *lexer) append(r rune) { func (l *lexer) append(r rune) {
debugPrint(fmt.Sprintf("append %c\n", (r))) debugPrint(fmt.Sprintf("append %c\n", (r)))
if l.cur == nil { if l.buf == nil {
l.cur = make([]rune, 0, 32) l.buf = make([]rune, 0, 32)
} }
l.cur = append(l.cur, r) l.buf = append(l.buf, r)
} }
func isDigit(r rune) bool { func isDigit(r rune) bool {
@ -92,11 +97,7 @@ func lexOpenParen(l *lexer) (stateFn, error) {
debugPrint("-->lexOpenParen") debugPrint("-->lexOpenParen")
l.out <- token{"(", openParenToken} l.out <- token{"(", openParenToken}
l.depth++ l.depth++
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n', '\r': case ' ', '\t', '\n', '\r':
return lexWhitespace, nil return lexWhitespace, nil
case '(': case '(':
@ -106,11 +107,11 @@ func lexOpenParen(l *lexer) (stateFn, error) {
case ';': case ';':
return lexComment, nil return lexComment, nil
} }
if isDigit(r) { if isDigit(l.cur) {
l.append(r) l.append(l.cur)
return lexInt, nil return lexInt, nil
} }
l.append(r) l.append(l.cur)
return lexSymbol, nil return lexSymbol, nil
} }
@ -119,11 +120,7 @@ func lexOpenParen(l *lexer) (stateFn, error) {
// "wrong" but who honestly gives a shit. // "wrong" but who honestly gives a shit.
func lexWhitespace(l *lexer) (stateFn, error) { func lexWhitespace(l *lexer) (stateFn, error) {
debugPrint("-->lexWhitespace") debugPrint("-->lexWhitespace")
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n', '\r': case ' ', '\t', '\n', '\r':
return lexWhitespace, nil return lexWhitespace, nil
case '"': case '"':
@ -135,39 +132,31 @@ func lexWhitespace(l *lexer) (stateFn, error) {
case ';': case ';':
return lexComment, nil return lexComment, nil
} }
if isDigit(r) { if isDigit(l.cur) {
l.append(r) l.append(l.cur)
return lexInt, nil return lexInt, nil
} }
l.append(r) l.append(l.cur)
return lexSymbol, nil return lexSymbol, nil
} }
func lexString(l *lexer) (stateFn, error) { func lexString(l *lexer) (stateFn, error) {
debugPrint("-->lexString") debugPrint("-->lexString")
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case '"': case '"':
l.emit(stringToken) l.emit(stringToken)
return lexWhitespace, nil return lexWhitespace, nil
case '\\': case '\\':
return lexStringEsc, nil return lexStringEsc, nil
} }
l.append(r) l.append(l.cur)
return lexString, nil return lexString, nil
} }
// lex the character *after* the string escape character \ // lex the character *after* the string escape character \
func lexStringEsc(l *lexer) (stateFn, error) { func lexStringEsc(l *lexer) (stateFn, error) {
debugPrint("-->lexStringEsc") debugPrint("-->lexStringEsc")
r, err := l.nextRune() l.append(l.cur)
if err != nil {
return nil, err
}
l.append(r)
return lexString, nil return lexString, nil
} }
@ -176,16 +165,12 @@ func lexStringEsc(l *lexer) (stateFn, error) {
// digits. Everything else is crap. // digits. Everything else is crap.
func lexInt(l *lexer) (stateFn, error) { func lexInt(l *lexer) (stateFn, error) {
debugPrint("-->lexInt") debugPrint("-->lexInt")
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n', '\r': case ' ', '\t', '\n', '\r':
l.emit(integerToken) l.emit(integerToken)
return lexWhitespace, nil return lexWhitespace, nil
case '.': case '.':
l.append(r) l.append(l.cur)
return lexFloat, nil return lexFloat, nil
case ')': case ')':
l.emit(integerToken) l.emit(integerToken)
@ -194,23 +179,18 @@ func lexInt(l *lexer) (stateFn, error) {
l.emit(integerToken) l.emit(integerToken)
return lexComment, nil return lexComment, nil
} }
if isDigit(r) { if isDigit(l.cur) {
l.append(r) l.append(l.cur)
return lexInt, nil return lexInt, nil
} }
return nil, fmt.Errorf("unexpected rune in lexInt: %c", r) return nil, fmt.Errorf("unexpected rune in lexInt: %c", l.cur)
} }
// once we're in a float, the only valid values are digits, whitespace or close // once we're in a float, the only valid values are digits, whitespace or close
// paren. // paren.
func lexFloat(l *lexer) (stateFn, error) { func lexFloat(l *lexer) (stateFn, error) {
debugPrint("-->lexFloat") debugPrint("-->lexFloat")
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n', '\r': case ' ', '\t', '\n', '\r':
l.emit(floatToken) l.emit(floatToken)
return lexWhitespace, nil return lexWhitespace, nil
@ -221,22 +201,17 @@ func lexFloat(l *lexer) (stateFn, error) {
l.emit(floatToken) l.emit(floatToken)
return lexComment, nil return lexComment, nil
} }
if isDigit(r) { if isDigit(l.cur) {
l.append(r) l.append(l.cur)
return lexFloat, nil return lexFloat, nil
} }
return nil, fmt.Errorf("unexpected run in lexFloat: %c", r) return nil, fmt.Errorf("unexpected rune in lexFloat: %c", l.cur)
} }
// lexes a symbol in progress // lexes a symbol in progress
func lexSymbol(l *lexer) (stateFn, error) { func lexSymbol(l *lexer) (stateFn, error) {
debugPrint("-->lexSymbol") debugPrint("-->lexSymbol")
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n', '\r': case ' ', '\t', '\n', '\r':
debugPrint("ending lexSymbol on whitespace") debugPrint("ending lexSymbol on whitespace")
l.emit(symbolToken) l.emit(symbolToken)
@ -248,7 +223,7 @@ func lexSymbol(l *lexer) (stateFn, error) {
l.emit(symbolToken) l.emit(symbolToken)
return lexComment, nil return lexComment, nil
default: default:
l.append(r) l.append(l.cur)
return lexSymbol, nil return lexSymbol, nil
} }
panic("not reached") panic("not reached")
@ -259,11 +234,7 @@ func lexCloseParen(l *lexer) (stateFn, error) {
debugPrint("-->lexCloseParen") debugPrint("-->lexCloseParen")
l.out <- token{")", closeParenToken} l.out <- token{")", closeParenToken}
l.depth-- l.depth--
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n', '\r': case ' ', '\t', '\n', '\r':
return lexWhitespace, nil return lexWhitespace, nil
case ')': case ')':
@ -277,11 +248,7 @@ func lexCloseParen(l *lexer) (stateFn, error) {
// lexes a comment // lexes a comment
func lexComment(l *lexer) (stateFn, error) { func lexComment(l *lexer) (stateFn, error) {
debugPrint("-->lexComment") debugPrint("-->lexComment")
r, err := l.nextRune() switch l.cur {
if err != nil {
return nil, err
}
switch r {
case '\n', '\r': case '\n', '\r':
return lexWhitespace, nil return lexWhitespace, nil
} }
@ -293,12 +260,19 @@ func lexComment(l *lexer) (stateFn, error) {
// new tokens. // new tokens.
func lex(input io.RuneReader, c chan token) { func lex(input io.RuneReader, c chan token) {
defer close(c) defer close(c)
l := &lexer{input, nil, 0, c} l := &lexer{input, nil, ' ', 0, c}
var err error var err error
f := stateFn(lexWhitespace) f := stateFn(lexWhitespace)
for err == nil { for {
f, err = f(l) f, err = f(l)
if err != nil {
break
}
err = l.next()
if err != nil {
break
}
} }
if err != io.EOF { if err != io.EOF {
fmt.Println(err) fmt.Println(err)

Loading…
Cancel
Save