lexer produces tokens now, not strings

can also lex integers and floats now
master
Jordan Orelli 12 years ago
parent 816da3fbbc
commit 1a6b286b5c

@ -1,11 +1,13 @@
(+ 1 (+ 1 1) (dave (sam 1)))
(+ 1
2
3
4
22.3
a
3.
4.0
(dave
1
2
albert-camus
3
(sam 3 2 2)))

@ -7,13 +7,48 @@ import (
"os"
)
type typ3 int
const (
invalid typ3 = iota
int3ger
symbol
openParen
closeParen
str1ng
fl0at
)
func (t typ3) String() string {
switch t {
case int3ger:
return "integer"
case symbol:
return "symbol"
case openParen:
return "open_paren"
case closeParen:
return "close_paren"
case str1ng:
return "string"
case fl0at:
return "float"
}
panic("wtf")
}
type token struct {
lexeme string
t typ3
}
type stateFn func(*lexer) (stateFn, error)
type lexer struct {
input *bufio.Reader
cur []rune
depth int
out chan string
out chan token
}
func (l *lexer) next() (rune, error) {
@ -21,19 +56,23 @@ func (l *lexer) next() (rune, error) {
return r, err
}
func (l *lexer) emit() {
l.out <- string(l.cur)
// clears the current lexem buffer and emits a token of the given type.
// There's no sanity checking to make sure you don't emit some bullshit, so
// don't fuck it up.
func (l *lexer) emit(t typ3) {
l.out <- token{lexeme: string(l.cur), t: t}
l.cur = nil
}
// appends the rune to the current in-progress lexem
func (l *lexer) append(r rune) {
if l.cur == nil {
l.cur = []rune{r}
return
l.cur = make([]rune, 0, 32)
}
l.cur = append(l.cur, r)
}
// lexes stuff at the root level of the input.
func lexRoot(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
@ -41,8 +80,6 @@ func lexRoot(l *lexer) (stateFn, error) {
}
switch r {
case '(':
l.append(r)
l.emit()
return lexOpenParen, nil
case ' ', '\t', '\n':
return lexRoot, nil
@ -50,7 +87,17 @@ func lexRoot(l *lexer) (stateFn, error) {
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r)
}
func isDigit(r rune) bool {
switch r {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return true
}
return false
}
// lexes an open parenthesis
func lexOpenParen(l *lexer) (stateFn, error) {
l.out <- token{"(", openParen}
l.depth++
r, err := l.next()
if err != nil {
@ -61,34 +108,84 @@ func lexOpenParen(l *lexer) (stateFn, error) {
return lexRoot, nil
case '(':
return nil, fmt.Errorf("the whole (( thing isn't supported yet")
default:
}
if isDigit(r) {
l.append(r)
return lexOnSymbol, nil
return lexInt, nil
}
panic("not reached")
l.append(r)
return lexSymbol, nil
}
func lexInt(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n':
l.emit(int3ger)
return lexWhitespace, nil
case '.':
l.append(r)
return lexFloat, nil
case ')':
l.emit(int3ger)
return lexCloseParen, nil
}
if isDigit(r) {
l.append(r)
return lexInt, nil
}
return nil, fmt.Errorf("unexpected rune in lexInt: %c", r)
}
func lexOnSymbol(l *lexer) (stateFn, error) {
// once we're in a float, the only valid values are digits, whitespace or close
// paren.
func lexFloat(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n':
l.emit()
l.emit(fl0at)
return lexWhitespace, nil
case ')':
l.emit()
l.emit(fl0at)
return lexCloseParen, nil
}
if isDigit(r) {
l.append(r)
l.emit()
return lexFloat, nil
}
return nil, fmt.Errorf("unexpected run in lexFloat: %c", r)
}
// lexes a symbol in progress
func lexSymbol(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n':
l.emit(symbol)
return lexWhitespace, nil
case ')':
l.emit(symbol)
return lexCloseParen, nil
default:
l.append(r)
return lexOnSymbol, nil
return lexSymbol, nil
}
panic("not reached")
}
// lexes some whitespace in progress. Maybe this should be combined with root
// and the lexer shouldn't have a state. I think wehat I'm doing now is
// "wrong" but who honestly gives a shit.
func lexWhitespace(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
@ -98,17 +195,19 @@ func lexWhitespace(l *lexer) (stateFn, error) {
case ' ', '\t', '\n':
return lexWhitespace, nil
case '(':
l.append(r)
l.emit()
return lexOpenParen, nil
default:
}
if isDigit(r) {
l.append(r)
return lexOnSymbol, nil
return lexInt, nil
}
panic("not reached")
l.append(r)
return lexSymbol, nil
}
// lex a close parenthesis
func lexCloseParen(l *lexer) (stateFn, error) {
l.out <- token{")", closeParen}
l.depth--
r, err := l.next()
if err != nil {
@ -122,14 +221,15 @@ func lexCloseParen(l *lexer) (stateFn, error) {
return lexWhitespace, nil
}
case ')':
l.append(r)
l.emit()
return lexCloseParen, nil
}
return nil, fmt.Errorf("unimplemented")
}
func lex(input io.Reader, c chan string) {
// lexes some lispy input from an io.Reader, emiting tokens on chan c. The
// channel is closed when the input reaches EOF, signaling that there are no
// new tokens.
func lex(input io.Reader, c chan token) {
defer close(c)
l := &lexer{
input: bufio.NewReader(input),
@ -158,10 +258,10 @@ func main() {
os.Exit(1)
}
c := make(chan string)
c := make(chan token)
go lex(f, c)
for s := range c {
fmt.Println(s)
fmt.Println(s.t, s.lexeme)
}
}

Loading…
Cancel
Save