|
|
@ -7,13 +7,48 @@ import (
|
|
|
|
"os"
|
|
|
|
"os"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
type typ3 int
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
|
|
|
invalid typ3 = iota
|
|
|
|
|
|
|
|
int3ger
|
|
|
|
|
|
|
|
symbol
|
|
|
|
|
|
|
|
openParen
|
|
|
|
|
|
|
|
closeParen
|
|
|
|
|
|
|
|
str1ng
|
|
|
|
|
|
|
|
fl0at
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func (t typ3) String() string {
|
|
|
|
|
|
|
|
switch t {
|
|
|
|
|
|
|
|
case int3ger:
|
|
|
|
|
|
|
|
return "integer"
|
|
|
|
|
|
|
|
case symbol:
|
|
|
|
|
|
|
|
return "symbol"
|
|
|
|
|
|
|
|
case openParen:
|
|
|
|
|
|
|
|
return "open_paren"
|
|
|
|
|
|
|
|
case closeParen:
|
|
|
|
|
|
|
|
return "close_paren"
|
|
|
|
|
|
|
|
case str1ng:
|
|
|
|
|
|
|
|
return "string"
|
|
|
|
|
|
|
|
case fl0at:
|
|
|
|
|
|
|
|
return "float"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
panic("wtf")
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
type token struct {
|
|
|
|
|
|
|
|
lexeme string
|
|
|
|
|
|
|
|
t typ3
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type stateFn func(*lexer) (stateFn, error)
|
|
|
|
type stateFn func(*lexer) (stateFn, error)
|
|
|
|
|
|
|
|
|
|
|
|
type lexer struct {
|
|
|
|
type lexer struct {
|
|
|
|
input *bufio.Reader
|
|
|
|
input *bufio.Reader
|
|
|
|
cur []rune
|
|
|
|
cur []rune
|
|
|
|
depth int
|
|
|
|
depth int
|
|
|
|
out chan string
|
|
|
|
out chan token
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (l *lexer) next() (rune, error) {
|
|
|
|
func (l *lexer) next() (rune, error) {
|
|
|
@ -21,19 +56,23 @@ func (l *lexer) next() (rune, error) {
|
|
|
|
return r, err
|
|
|
|
return r, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (l *lexer) emit() {
|
|
|
|
// clears the current lexem buffer and emits a token of the given type.
|
|
|
|
l.out <- string(l.cur)
|
|
|
|
// There's no sanity checking to make sure you don't emit some bullshit, so
|
|
|
|
|
|
|
|
// don't fuck it up.
|
|
|
|
|
|
|
|
func (l *lexer) emit(t typ3) {
|
|
|
|
|
|
|
|
l.out <- token{lexeme: string(l.cur), t: t}
|
|
|
|
l.cur = nil
|
|
|
|
l.cur = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// appends the rune to the current in-progress lexem
|
|
|
|
func (l *lexer) append(r rune) {
|
|
|
|
func (l *lexer) append(r rune) {
|
|
|
|
if l.cur == nil {
|
|
|
|
if l.cur == nil {
|
|
|
|
l.cur = []rune{r}
|
|
|
|
l.cur = make([]rune, 0, 32)
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
l.cur = append(l.cur, r)
|
|
|
|
l.cur = append(l.cur, r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// lexes stuff at the root level of the input.
|
|
|
|
func lexRoot(l *lexer) (stateFn, error) {
|
|
|
|
func lexRoot(l *lexer) (stateFn, error) {
|
|
|
|
r, err := l.next()
|
|
|
|
r, err := l.next()
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
@ -41,8 +80,6 @@ func lexRoot(l *lexer) (stateFn, error) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
switch r {
|
|
|
|
switch r {
|
|
|
|
case '(':
|
|
|
|
case '(':
|
|
|
|
l.append(r)
|
|
|
|
|
|
|
|
l.emit()
|
|
|
|
|
|
|
|
return lexOpenParen, nil
|
|
|
|
return lexOpenParen, nil
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
return lexRoot, nil
|
|
|
|
return lexRoot, nil
|
|
|
@ -50,7 +87,17 @@ func lexRoot(l *lexer) (stateFn, error) {
|
|
|
|
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r)
|
|
|
|
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func isDigit(r rune) bool {
|
|
|
|
|
|
|
|
switch r {
|
|
|
|
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// lexes an open parenthesis
|
|
|
|
func lexOpenParen(l *lexer) (stateFn, error) {
|
|
|
|
func lexOpenParen(l *lexer) (stateFn, error) {
|
|
|
|
|
|
|
|
l.out <- token{"(", openParen}
|
|
|
|
l.depth++
|
|
|
|
l.depth++
|
|
|
|
r, err := l.next()
|
|
|
|
r, err := l.next()
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
@ -61,34 +108,84 @@ func lexOpenParen(l *lexer) (stateFn, error) {
|
|
|
|
return lexRoot, nil
|
|
|
|
return lexRoot, nil
|
|
|
|
case '(':
|
|
|
|
case '(':
|
|
|
|
return nil, fmt.Errorf("the whole (( thing isn't supported yet")
|
|
|
|
return nil, fmt.Errorf("the whole (( thing isn't supported yet")
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
if isDigit(r) {
|
|
|
|
l.append(r)
|
|
|
|
l.append(r)
|
|
|
|
return lexOnSymbol, nil
|
|
|
|
return lexInt, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
panic("not reached")
|
|
|
|
l.append(r)
|
|
|
|
|
|
|
|
return lexSymbol, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func lexInt(l *lexer) (stateFn, error) {
|
|
|
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
switch r {
|
|
|
|
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
|
|
|
|
l.emit(int3ger)
|
|
|
|
|
|
|
|
return lexWhitespace, nil
|
|
|
|
|
|
|
|
case '.':
|
|
|
|
|
|
|
|
l.append(r)
|
|
|
|
|
|
|
|
return lexFloat, nil
|
|
|
|
|
|
|
|
case ')':
|
|
|
|
|
|
|
|
l.emit(int3ger)
|
|
|
|
|
|
|
|
return lexCloseParen, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if isDigit(r) {
|
|
|
|
|
|
|
|
l.append(r)
|
|
|
|
|
|
|
|
return lexInt, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, fmt.Errorf("unexpected rune in lexInt: %c", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func lexOnSymbol(l *lexer) (stateFn, error) {
|
|
|
|
// once we're in a float, the only valid values are digits, whitespace or close
|
|
|
|
|
|
|
|
// paren.
|
|
|
|
|
|
|
|
func lexFloat(l *lexer) (stateFn, error) {
|
|
|
|
r, err := l.next()
|
|
|
|
r, err := l.next()
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
switch r {
|
|
|
|
switch r {
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
l.emit()
|
|
|
|
l.emit(fl0at)
|
|
|
|
return lexWhitespace, nil
|
|
|
|
return lexWhitespace, nil
|
|
|
|
case ')':
|
|
|
|
case ')':
|
|
|
|
l.emit()
|
|
|
|
l.emit(fl0at)
|
|
|
|
|
|
|
|
return lexCloseParen, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if isDigit(r) {
|
|
|
|
l.append(r)
|
|
|
|
l.append(r)
|
|
|
|
l.emit()
|
|
|
|
return lexFloat, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, fmt.Errorf("unexpected run in lexFloat: %c", r)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// lexes a symbol in progress
|
|
|
|
|
|
|
|
func lexSymbol(l *lexer) (stateFn, error) {
|
|
|
|
|
|
|
|
r, err := l.next()
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
switch r {
|
|
|
|
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
|
|
|
|
l.emit(symbol)
|
|
|
|
|
|
|
|
return lexWhitespace, nil
|
|
|
|
|
|
|
|
case ')':
|
|
|
|
|
|
|
|
l.emit(symbol)
|
|
|
|
return lexCloseParen, nil
|
|
|
|
return lexCloseParen, nil
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
l.append(r)
|
|
|
|
l.append(r)
|
|
|
|
return lexOnSymbol, nil
|
|
|
|
return lexSymbol, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
panic("not reached")
|
|
|
|
panic("not reached")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// lexes some whitespace in progress. Maybe this should be combined with root
|
|
|
|
|
|
|
|
// and the lexer shouldn't have a state. I think wehat I'm doing now is
|
|
|
|
|
|
|
|
// "wrong" but who honestly gives a shit.
|
|
|
|
func lexWhitespace(l *lexer) (stateFn, error) {
|
|
|
|
func lexWhitespace(l *lexer) (stateFn, error) {
|
|
|
|
r, err := l.next()
|
|
|
|
r, err := l.next()
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
@ -98,17 +195,19 @@ func lexWhitespace(l *lexer) (stateFn, error) {
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
case ' ', '\t', '\n':
|
|
|
|
return lexWhitespace, nil
|
|
|
|
return lexWhitespace, nil
|
|
|
|
case '(':
|
|
|
|
case '(':
|
|
|
|
l.append(r)
|
|
|
|
|
|
|
|
l.emit()
|
|
|
|
|
|
|
|
return lexOpenParen, nil
|
|
|
|
return lexOpenParen, nil
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
if isDigit(r) {
|
|
|
|
l.append(r)
|
|
|
|
l.append(r)
|
|
|
|
return lexOnSymbol, nil
|
|
|
|
return lexInt, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
panic("not reached")
|
|
|
|
l.append(r)
|
|
|
|
|
|
|
|
return lexSymbol, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// lex a close parenthesis
|
|
|
|
func lexCloseParen(l *lexer) (stateFn, error) {
|
|
|
|
func lexCloseParen(l *lexer) (stateFn, error) {
|
|
|
|
|
|
|
|
l.out <- token{")", closeParen}
|
|
|
|
l.depth--
|
|
|
|
l.depth--
|
|
|
|
r, err := l.next()
|
|
|
|
r, err := l.next()
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
@ -122,14 +221,15 @@ func lexCloseParen(l *lexer) (stateFn, error) {
|
|
|
|
return lexWhitespace, nil
|
|
|
|
return lexWhitespace, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
case ')':
|
|
|
|
case ')':
|
|
|
|
l.append(r)
|
|
|
|
|
|
|
|
l.emit()
|
|
|
|
|
|
|
|
return lexCloseParen, nil
|
|
|
|
return lexCloseParen, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("unimplemented")
|
|
|
|
return nil, fmt.Errorf("unimplemented")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func lex(input io.Reader, c chan string) {
|
|
|
|
// lexes some lispy input from an io.Reader, emiting tokens on chan c. The
|
|
|
|
|
|
|
|
// channel is closed when the input reaches EOF, signaling that there are no
|
|
|
|
|
|
|
|
// new tokens.
|
|
|
|
|
|
|
|
func lex(input io.Reader, c chan token) {
|
|
|
|
defer close(c)
|
|
|
|
defer close(c)
|
|
|
|
l := &lexer{
|
|
|
|
l := &lexer{
|
|
|
|
input: bufio.NewReader(input),
|
|
|
|
input: bufio.NewReader(input),
|
|
|
@ -158,10 +258,10 @@ func main() {
|
|
|
|
os.Exit(1)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
c := make(chan string)
|
|
|
|
c := make(chan token)
|
|
|
|
go lex(f, c)
|
|
|
|
go lex(f, c)
|
|
|
|
|
|
|
|
|
|
|
|
for s := range c {
|
|
|
|
for s := range c {
|
|
|
|
fmt.Println(s)
|
|
|
|
fmt.Println(s.t, s.lexeme)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|