You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

262 lines
4.5 KiB
Go

package main
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
"unicode"
)
type tokenType int
func (t tokenType) String() string {
switch t {
case t_error:
return "t_error"
case t_eof:
return "t_eof"
case t_string:
return "t_string"
case t_name:
return "t_name"
case t_type:
return "t_type"
case t_equals:
return "t_equals"
case t_comment:
return "t_comment"
case t_list_start:
return "t_list_start"
case t_list_end:
return "t_list_end"
case t_list_separator:
return "t_list_separator"
default:
panic(fmt.Sprintf("unknown token type: %v", t))
}
}
const (
t_error tokenType = iota // a stored lex error
t_eof // end of file token
t_string // a string literal
t_name // a name
t_type // a type
t_equals // equals sign
t_comment // a comment
t_list_start // [
t_list_end // ]
t_list_separator // ,
)
type stateFn func(*lexer) (stateFn, error)
type token struct {
t tokenType
s string
}
type lexer struct {
in io.RuneReader
out chan token
buf []rune // running buffer for current lexeme
backup []rune
}
func (l *lexer) lex() {
defer close(l.out)
var err error
fn := lexRoot
for {
fn, err = fn(l)
switch err {
case nil:
case io.EOF:
l.out <- token{t_eof, ""}
return
default:
l.out <- token{t_error, err.Error()}
return
}
}
}
func (l *lexer) next() (rune, error) {
if len(l.backup) > 0 {
r := l.backup[len(l.backup)-1]
l.backup = l.backup[:len(l.backup)-1]
return r, nil
}
r, _, err := l.in.ReadRune()
return r, err
}
func (l *lexer) keep(r rune) {
if l.buf == nil {
l.buf = make([]rune, 0, 18)
}
l.buf = append(l.buf, r)
}
func (l *lexer) unread(r rune) {
l.backup = append(l.backup, r)
}
func (l *lexer) emit(t tokenType) {
l.out <- token{t, string(l.buf)}
l.buf = l.buf[0:0]
}
func lexString(in string) chan token {
r := strings.NewReader(in)
return lex(r)
}
func lex(r io.Reader) chan token {
l := lexer{
in: bufio.NewReader(r),
out: make(chan token),
backup: make([]rune, 0, 4),
}
go l.lex()
return l.out
}
func fullTokens(c chan token) ([]token, error) {
tokens := make([]token, 0, 32)
for t := range c {
if t.t == t_error {
return nil, errors.New(t.s)
}
tokens = append(tokens, t)
}
return tokens, nil
}
func lexRoot(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch {
case r == '=':
l.keep(r)
l.emit(t_equals)
return lexRoot, nil
case r == '"', r == '`':
return lexStringLiteral(r), nil
case r == '#':
return lexComment, nil
case r == '[':
l.keep(r)
l.emit(t_list_start)
return lexRoot, nil
case r == ']':
l.keep(r)
l.emit(t_list_end)
return lexRoot, nil
case r == ',':
l.keep(r)
l.emit(t_list_separator)
return lexRoot, nil
case unicode.IsSpace(r):
return lexRoot, nil
case unicode.IsLower(r):
l.keep(r)
return lexName, nil
case unicode.IsUpper(r):
l.keep(r)
return lexType, nil
default:
return nil, fmt.Errorf("unexpected rune in lexRoot: %c", r)
}
}
func lexComment(l *lexer) (stateFn, error) {
r, err := l.next()
switch err {
case io.EOF:
l.emit(t_comment)
return nil, io.EOF
case nil:
default:
return nil, err
}
switch {
case r == '\n':
l.emit(t_comment)
return lexRoot, nil
default:
l.keep(r)
return lexComment, nil
}
}
func lexStringLiteral(delim rune) stateFn {
return func(l *lexer) (stateFn, error) {
r, err := l.next()
if err != nil {
return nil, err
}
switch r {
case delim:
l.emit(t_string)
return lexRoot, nil
case '\\':
r, err := l.next()
if err != nil {
return nil, err
}
l.keep(r)
return lexStringLiteral(delim), nil
default:
l.keep(r)
return lexStringLiteral(delim), nil
}
}
}
func lexName(l *lexer) (stateFn, error) {
r, err := l.next()
switch err {
case io.EOF:
l.emit(t_name)
return nil, io.EOF
case nil:
default:
return nil, err
}
switch {
case unicode.IsLetter(r), unicode.IsDigit(r), r == '_':
l.keep(r)
return lexName, nil
default:
l.emit(t_name)
l.unread(r)
return lexRoot, nil
}
}
func lexType(l *lexer) (stateFn, error) {
r, err := l.next()
switch err {
case io.EOF:
l.emit(t_type)
return nil, io.EOF
case nil:
default:
return nil, err
}
switch {
case unicode.IsLetter(r), unicode.IsDigit(r), r == '_':
l.keep(r)
return lexType, nil
default:
l.emit(t_type)
l.unread(r)
return lexRoot, nil
}
}