From 1a6b286b5c23d2be00acc0e288447f67541ef199 Mon Sep 17 00:00:00 2001 From: Jordan Orelli Date: Sat, 13 Oct 2012 22:20:44 -0400 Subject: [PATCH] lexer produces tokens now, not strings can also lex integers and floats now --- input.lisp | 8 +-- skeam.go | 150 ++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 130 insertions(+), 28 deletions(-) diff --git a/input.lisp b/input.lisp index d529718..65c7d10 100644 --- a/input.lisp +++ b/input.lisp @@ -1,11 +1,13 @@ (+ 1 (+ 1 1) (dave (sam 1))) (+ 1 - 2 - 3 - 4 + 22.3 + a + 3. + 4.0 (dave 1 2 + albert-camus 3 (sam 3 2 2))) diff --git a/skeam.go b/skeam.go index 83e152a..be99895 100644 --- a/skeam.go +++ b/skeam.go @@ -7,13 +7,48 @@ import ( "os" ) +type typ3 int + +const ( + invalid typ3 = iota + int3ger + symbol + openParen + closeParen + str1ng + fl0at +) + +func (t typ3) String() string { + switch t { + case int3ger: + return "integer" + case symbol: + return "symbol" + case openParen: + return "open_paren" + case closeParen: + return "close_paren" + case str1ng: + return "string" + case fl0at: + return "float" + } + panic("wtf") +} + +type token struct { + lexeme string + t typ3 +} + type stateFn func(*lexer) (stateFn, error) type lexer struct { input *bufio.Reader cur []rune depth int - out chan string + out chan token } func (l *lexer) next() (rune, error) { @@ -21,19 +56,23 @@ func (l *lexer) next() (rune, error) { return r, err } -func (l *lexer) emit() { - l.out <- string(l.cur) +// clears the current lexem buffer and emits a token of the given type. +// There's no sanity checking to make sure you don't emit some bullshit, so +// don't fuck it up. +func (l *lexer) emit(t typ3) { + l.out <- token{lexeme: string(l.cur), t: t} l.cur = nil } +// appends the rune to the current in-progress lexem func (l *lexer) append(r rune) { if l.cur == nil { - l.cur = []rune{r} - return + l.cur = make([]rune, 0, 32) } l.cur = append(l.cur, r) } +// lexes stuff at the root level of the input. func lexRoot(l *lexer) (stateFn, error) { r, err := l.next() if err != nil { @@ -41,8 +80,6 @@ func lexRoot(l *lexer) (stateFn, error) { } switch r { case '(': - l.append(r) - l.emit() return lexOpenParen, nil case ' ', '\t', '\n': return lexRoot, nil @@ -50,7 +87,17 @@ func lexRoot(l *lexer) (stateFn, error) { return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r) } +func isDigit(r rune) bool { + switch r { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return true + } + return false +} + +// lexes an open parenthesis func lexOpenParen(l *lexer) (stateFn, error) { + l.out <- token{"(", openParen} l.depth++ r, err := l.next() if err != nil { @@ -61,34 +108,84 @@ func lexOpenParen(l *lexer) (stateFn, error) { return lexRoot, nil case '(': return nil, fmt.Errorf("the whole (( thing isn't supported yet") - default: + } + if isDigit(r) { l.append(r) - return lexOnSymbol, nil + return lexInt, nil } - panic("not reached") + l.append(r) + return lexSymbol, nil +} + +func lexInt(l *lexer) (stateFn, error) { + r, err := l.next() + if err != nil { + return nil, err + } + switch r { + case ' ', '\t', '\n': + l.emit(int3ger) + return lexWhitespace, nil + case '.': + l.append(r) + return lexFloat, nil + case ')': + l.emit(int3ger) + return lexCloseParen, nil + } + if isDigit(r) { + l.append(r) + return lexInt, nil + } + return nil, fmt.Errorf("unexpected rune in lexInt: %c", r) } -func lexOnSymbol(l *lexer) (stateFn, error) { +// once we're in a float, the only valid values are digits, whitespace or close +// paren. +func lexFloat(l *lexer) (stateFn, error) { r, err := l.next() if err != nil { return nil, err } + switch r { case ' ', '\t', '\n': - l.emit() + l.emit(fl0at) return lexWhitespace, nil case ')': - l.emit() + l.emit(fl0at) + return lexCloseParen, nil + } + if isDigit(r) { l.append(r) - l.emit() + return lexFloat, nil + } + return nil, fmt.Errorf("unexpected run in lexFloat: %c", r) +} + +// lexes a symbol in progress +func lexSymbol(l *lexer) (stateFn, error) { + r, err := l.next() + if err != nil { + return nil, err + } + switch r { + case ' ', '\t', '\n': + l.emit(symbol) + return lexWhitespace, nil + case ')': + l.emit(symbol) return lexCloseParen, nil default: l.append(r) - return lexOnSymbol, nil + return lexSymbol, nil } panic("not reached") } +// lexes some whitespace in progress. Maybe this should be combined with root +// and the lexer shouldn't have a state. I think wehat I'm doing now is +// "wrong" but who honestly gives a shit. func lexWhitespace(l *lexer) (stateFn, error) { r, err := l.next() if err != nil { @@ -98,17 +195,19 @@ func lexWhitespace(l *lexer) (stateFn, error) { case ' ', '\t', '\n': return lexWhitespace, nil case '(': - l.append(r) - l.emit() return lexOpenParen, nil - default: + } + if isDigit(r) { l.append(r) - return lexOnSymbol, nil + return lexInt, nil } - panic("not reached") + l.append(r) + return lexSymbol, nil } +// lex a close parenthesis func lexCloseParen(l *lexer) (stateFn, error) { + l.out <- token{")", closeParen} l.depth-- r, err := l.next() if err != nil { @@ -122,14 +221,15 @@ func lexCloseParen(l *lexer) (stateFn, error) { return lexWhitespace, nil } case ')': - l.append(r) - l.emit() return lexCloseParen, nil } return nil, fmt.Errorf("unimplemented") } -func lex(input io.Reader, c chan string) { +// lexes some lispy input from an io.Reader, emiting tokens on chan c. The +// channel is closed when the input reaches EOF, signaling that there are no +// new tokens. +func lex(input io.Reader, c chan token) { defer close(c) l := &lexer{ input: bufio.NewReader(input), @@ -158,10 +258,10 @@ func main() { os.Exit(1) } - c := make(chan string) + c := make(chan token) go lex(f, c) for s := range c { - fmt.Println(s) + fmt.Println(s.t, s.lexeme) } }