lexer produces tokens now, not strings

can also lex integers and floats now
master
Jordan Orelli 12 years ago
parent 816da3fbbc
commit 1a6b286b5c

@ -1,11 +1,13 @@
(+ 1 (+ 1 1) (dave (sam 1))) (+ 1 (+ 1 1) (dave (sam 1)))
(+ 1 (+ 1
2 22.3
3 a
4 3.
4.0
(dave (dave
1 1
2 2
albert-camus
3 3
(sam 3 2 2))) (sam 3 2 2)))

@ -7,13 +7,48 @@ import (
"os" "os"
) )
type typ3 int
const (
invalid typ3 = iota
int3ger
symbol
openParen
closeParen
str1ng
fl0at
)
func (t typ3) String() string {
switch t {
case int3ger:
return "integer"
case symbol:
return "symbol"
case openParen:
return "open_paren"
case closeParen:
return "close_paren"
case str1ng:
return "string"
case fl0at:
return "float"
}
panic("wtf")
}
type token struct {
lexeme string
t typ3
}
type stateFn func(*lexer) (stateFn, error) type stateFn func(*lexer) (stateFn, error)
type lexer struct { type lexer struct {
input *bufio.Reader input *bufio.Reader
cur []rune cur []rune
depth int depth int
out chan string out chan token
} }
func (l *lexer) next() (rune, error) { func (l *lexer) next() (rune, error) {
@ -21,19 +56,23 @@ func (l *lexer) next() (rune, error) {
return r, err return r, err
} }
func (l *lexer) emit() { // clears the current lexem buffer and emits a token of the given type.
l.out <- string(l.cur) // There's no sanity checking to make sure you don't emit some bullshit, so
// don't fuck it up.
func (l *lexer) emit(t typ3) {
l.out <- token{lexeme: string(l.cur), t: t}
l.cur = nil l.cur = nil
} }
// appends the rune to the current in-progress lexem
func (l *lexer) append(r rune) { func (l *lexer) append(r rune) {
if l.cur == nil { if l.cur == nil {
l.cur = []rune{r} l.cur = make([]rune, 0, 32)
return
} }
l.cur = append(l.cur, r) l.cur = append(l.cur, r)
} }
// lexes stuff at the root level of the input.
func lexRoot(l *lexer) (stateFn, error) { func lexRoot(l *lexer) (stateFn, error) {
r, err := l.next() r, err := l.next()
if err != nil { if err != nil {
@ -41,8 +80,6 @@ func lexRoot(l *lexer) (stateFn, error) {
} }
switch r { switch r {
case '(': case '(':
l.append(r)
l.emit()
return lexOpenParen, nil return lexOpenParen, nil
case ' ', '\t', '\n': case ' ', '\t', '\n':
return lexRoot, nil return lexRoot, nil
@ -50,7 +87,17 @@ func lexRoot(l *lexer) (stateFn, error) {
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r) return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r)
} }
func isDigit(r rune) bool {
switch r {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return true
}
return false
}
// lexes an open parenthesis
func lexOpenParen(l *lexer) (stateFn, error) { func lexOpenParen(l *lexer) (stateFn, error) {
l.out <- token{"(", openParen}
l.depth++ l.depth++
r, err := l.next() r, err := l.next()
if err != nil { if err != nil {
@ -61,34 +108,84 @@ func lexOpenParen(l *lexer) (stateFn, error) {
return lexRoot, nil return lexRoot, nil
case '(': case '(':
return nil, fmt.Errorf("the whole (( thing isn't supported yet") return nil, fmt.Errorf("the whole (( thing isn't supported yet")
default: }
if isDigit(r) {
l.append(r) l.append(r)
return lexOnSymbol, nil return lexInt, nil
} }
panic("not reached") l.append(r)
return lexSymbol, nil
}
func lexInt(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n':
l.emit(int3ger)
return lexWhitespace, nil
case '.':
l.append(r)
return lexFloat, nil
case ')':
l.emit(int3ger)
return lexCloseParen, nil
}
if isDigit(r) {
l.append(r)
return lexInt, nil
}
return nil, fmt.Errorf("unexpected rune in lexInt: %c", r)
} }
func lexOnSymbol(l *lexer) (stateFn, error) { // once we're in a float, the only valid values are digits, whitespace or close
// paren.
func lexFloat(l *lexer) (stateFn, error) {
r, err := l.next() r, err := l.next()
if err != nil { if err != nil {
return nil, err return nil, err
} }
switch r { switch r {
case ' ', '\t', '\n': case ' ', '\t', '\n':
l.emit() l.emit(fl0at)
return lexWhitespace, nil return lexWhitespace, nil
case ')': case ')':
l.emit() l.emit(fl0at)
return lexCloseParen, nil
}
if isDigit(r) {
l.append(r) l.append(r)
l.emit() return lexFloat, nil
}
return nil, fmt.Errorf("unexpected run in lexFloat: %c", r)
}
// lexes a symbol in progress
func lexSymbol(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case ' ', '\t', '\n':
l.emit(symbol)
return lexWhitespace, nil
case ')':
l.emit(symbol)
return lexCloseParen, nil return lexCloseParen, nil
default: default:
l.append(r) l.append(r)
return lexOnSymbol, nil return lexSymbol, nil
} }
panic("not reached") panic("not reached")
} }
// lexes some whitespace in progress. Maybe this should be combined with root
// and the lexer shouldn't have a state. I think wehat I'm doing now is
// "wrong" but who honestly gives a shit.
func lexWhitespace(l *lexer) (stateFn, error) { func lexWhitespace(l *lexer) (stateFn, error) {
r, err := l.next() r, err := l.next()
if err != nil { if err != nil {
@ -98,17 +195,19 @@ func lexWhitespace(l *lexer) (stateFn, error) {
case ' ', '\t', '\n': case ' ', '\t', '\n':
return lexWhitespace, nil return lexWhitespace, nil
case '(': case '(':
l.append(r)
l.emit()
return lexOpenParen, nil return lexOpenParen, nil
default: }
if isDigit(r) {
l.append(r) l.append(r)
return lexOnSymbol, nil return lexInt, nil
} }
panic("not reached") l.append(r)
return lexSymbol, nil
} }
// lex a close parenthesis
func lexCloseParen(l *lexer) (stateFn, error) { func lexCloseParen(l *lexer) (stateFn, error) {
l.out <- token{")", closeParen}
l.depth-- l.depth--
r, err := l.next() r, err := l.next()
if err != nil { if err != nil {
@ -122,14 +221,15 @@ func lexCloseParen(l *lexer) (stateFn, error) {
return lexWhitespace, nil return lexWhitespace, nil
} }
case ')': case ')':
l.append(r)
l.emit()
return lexCloseParen, nil return lexCloseParen, nil
} }
return nil, fmt.Errorf("unimplemented") return nil, fmt.Errorf("unimplemented")
} }
func lex(input io.Reader, c chan string) { // lexes some lispy input from an io.Reader, emiting tokens on chan c. The
// channel is closed when the input reaches EOF, signaling that there are no
// new tokens.
func lex(input io.Reader, c chan token) {
defer close(c) defer close(c)
l := &lexer{ l := &lexer{
input: bufio.NewReader(input), input: bufio.NewReader(input),
@ -158,10 +258,10 @@ func main() {
os.Exit(1) os.Exit(1)
} }
c := make(chan string) c := make(chan token)
go lex(f, c) go lex(f, c)
for s := range c { for s := range c {
fmt.Println(s) fmt.Println(s.t, s.lexeme)
} }
} }

Loading…
Cancel
Save