You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

289 lines
5.9 KiB
Go

package main
import (
12 years ago
"fmt"
"io"
"strings"
)
type typ3 int
const (
invalid typ3 = iota
12 years ago
integerToken
symbolToken
openParenToken
closeParenToken
stringToken
floatToken
)
func (t typ3) String() string {
switch t {
12 years ago
case integerToken:
return "integer"
12 years ago
case symbolToken:
return "symbol"
12 years ago
case openParenToken:
return "open_paren"
12 years ago
case closeParenToken:
return "close_paren"
12 years ago
case stringToken:
return "string"
12 years ago
case floatToken:
return "float"
}
panic("wtf")
}
type token struct {
lexeme string
t typ3
}
type stateFn func(*lexer) (stateFn, error)
type lexer struct {
io.RuneReader
buf []rune
cur rune
out chan token
}
// clears the current lexem buffer and emits a token of the given type.
// There's no sanity checking to make sure you don't emit some bullshit, so
// don't fuck it up.
func (l *lexer) emit(t typ3) {
debugPrint("emit " + string(l.buf))
l.out <- token{lexeme: string(l.buf), t: t}
l.buf = nil
}
// reads a rune from the input and assigns it to the current rune, l.cur.
// Returns an error if we were unable to read a rune from the input. I'm
// pretty sure it's always io.EOF but I'm not positive.
func (l *lexer) next() error {
r, _, err := l.ReadRune()
if err != nil {
return err
}
l.cur = r
return nil
}
// stores the current rune in our in-progress lexeme buffer
func (l *lexer) keep() {
if l.buf == nil {
l.buf = make([]rune, 0, 32)
}
l.buf = append(l.buf, l.cur)
}
func isDigit(r rune) bool {
switch r {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return true
}
return false
}
func debugPrint(s string) {
if DEBUG {
fmt.Println("#", s)
}
}
// lexes an open parenthesis
func lexOpenParen(l *lexer) (stateFn, error) {
debugPrint("-->lexOpenParen")
12 years ago
l.out <- token{"(", openParenToken}
switch l.cur {
case ' ', '\t', '\n', '\r':
return lexWhitespace, nil
case '(':
return lexOpenParen, nil
case ')':
return lexCloseParen, nil
case ';':
return lexComment, nil
}
if isDigit(l.cur) {
l.keep()
return lexInt, nil
}
l.keep()
return lexSymbol, nil
}
// lexes some whitespace in progress. Maybe this should be combined with root
// and the lexer shouldn't have a state. I think wehat I'm doing now is
// "wrong" but who honestly gives a shit.
func lexWhitespace(l *lexer) (stateFn, error) {
debugPrint("-->lexWhitespace")
switch l.cur {
case ' ', '\t', '\n', '\r':
return lexWhitespace, nil
case '"':
return lexString, nil
case '(':
return lexOpenParen, nil
case ')':
return lexCloseParen, nil
case ';':
return lexComment, nil
}
if isDigit(l.cur) {
l.keep()
return lexInt, nil
}
l.keep()
return lexSymbol, nil
}
// lexes an in-progress string. Basically we just keep all of the tokens until
// we see a double-quote character, signifying the end of the string. We also
// switch into escape mode if we come across a backslash.
func lexString(l *lexer) (stateFn, error) {
debugPrint("-->lexString")
switch l.cur {
case '"':
12 years ago
l.emit(stringToken)
return lexWhitespace, nil
case '\\':
return lexStringEsc, nil
}
l.keep()
return lexString, nil
}
// lex the character *after* the string escape character \. We always keep the
// next character, then just go back to string lexing.
func lexStringEsc(l *lexer) (stateFn, error) {
debugPrint("-->lexStringEsc")
l.keep()
return lexString, nil
}
// lex an integer. Once we're on an integer, the only valid characters are
// whitespace, close paren, a period to indicate we want a float, or more
// digits. Everything else is crap.
func lexInt(l *lexer) (stateFn, error) {
debugPrint("-->lexInt")
switch l.cur {
case ' ', '\t', '\n', '\r':
12 years ago
l.emit(integerToken)
return lexWhitespace, nil
case '.':
l.keep()
return lexFloat, nil
case ')':
12 years ago
l.emit(integerToken)
return lexCloseParen, nil
case ';':
12 years ago
l.emit(integerToken)
return lexComment, nil
}
if isDigit(l.cur) {
l.keep()
return lexInt, nil
}
return nil, fmt.Errorf("unexpected rune in lexInt: %c", l.cur)
}
// once we're in a float, the only valid values are digits, whitespace or close
// paren.
func lexFloat(l *lexer) (stateFn, error) {
debugPrint("-->lexFloat")
switch l.cur {
case ' ', '\t', '\n', '\r':
12 years ago
l.emit(floatToken)
return lexWhitespace, nil
case ')':
12 years ago
l.emit(floatToken)
return lexCloseParen, nil
case ';':
12 years ago
l.emit(floatToken)
return lexComment, nil
}
if isDigit(l.cur) {
l.keep()
return lexFloat, nil
}
return nil, fmt.Errorf("unexpected rune in lexFloat: %c", l.cur)
}
// lexes a symbol in progress
func lexSymbol(l *lexer) (stateFn, error) {
debugPrint("-->lexSymbol")
switch l.cur {
case ' ', '\t', '\n', '\r':
debugPrint("ending lexSymbol on whitespace")
12 years ago
l.emit(symbolToken)
return lexWhitespace, nil
case ')':
12 years ago
l.emit(symbolToken)
return lexCloseParen, nil
case ';':
12 years ago
l.emit(symbolToken)
return lexComment, nil
default:
l.keep()
return lexSymbol, nil
}
panic("not reached")
}
// lex a close parenthesis
func lexCloseParen(l *lexer) (stateFn, error) {
debugPrint("-->lexCloseParen")
12 years ago
l.out <- token{")", closeParenToken}
switch l.cur {
case ' ', '\t', '\n', '\r':
return lexWhitespace, nil
case ')':
return lexCloseParen, nil
case ';':
return lexComment, nil
}
return nil, fmt.Errorf("unimplemented")
}
// lexes a comment
func lexComment(l *lexer) (stateFn, error) {
debugPrint("-->lexComment")
switch l.cur {
case '\n', '\r':
return lexWhitespace, nil
}
return lexComment, nil
}
// lexes some lispy input from an io.Reader, emiting tokens on chan c. The
// channel is closed when the input reaches EOF, signaling that there are no
// new tokens.
func lex(input io.RuneReader, c chan token) {
defer close(c)
l := &lexer{input, nil, ' ', c}
var err error
f := stateFn(lexWhitespace)
for {
f, err = f(l)
if err != nil {
break
}
err = l.next()
if err != nil {
break
}
}
if err != io.EOF {
fmt.Println(err)
}
}
// lexes a lispy string onto a token channel
func lexs(input string, c chan token) {
lex(strings.NewReader(input), c)
}