You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

538 lines
10 KiB
Go

package moon
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"strings"
"time"
"unicode"
)
const eof = -1
type tokenType int
func (t tokenType) String() string {
switch t {
case t_error:
return "t_error"
case t_eof:
return "t_eof"
case t_string:
return "t_string"
case t_name:
return "t_name"
case t_comment:
return "t_comment"
case t_list_start:
return "t_list_start"
case t_list_end:
return "t_list_end"
case t_object_start:
return "t_object_start"
case t_object_separator:
return "t_object_separator"
case t_object_end:
return "t_object_end"
case t_real_number:
return "t_real_number"
case t_imaginary_number:
return "t_imaginary_number"
case t_variable:
return "t_variable"
case t_bool:
return "t_bool"
case t_duration:
return "t_duration"
default:
panic(fmt.Sprintf("unknown token type: %v", t))
}
}
const (
t_error tokenType = iota // a stored lex error
t_eof // end of file token
t_string // a bare string
t_string_quoted // a quoted string
t_name // a name
t_comment // a comment
t_list_start // [
t_list_end // ]
t_object_start // {
t_object_end // }
t_object_separator // :
t_real_number // a number
t_imaginary_number // an imaginary number
t_variable // e.g. @var_name, a variable name.
t_bool // a boolean token (true|false)
t_duration // a duration (e.g.: 1s, 2h45m, 900ms)
)
type stateFn func(*lexer) stateFn
type token struct {
t tokenType
s string
}
func (t token) String() string {
return fmt.Sprintf("{%s %s}", t.t, t.s)
}
type lexer struct {
in io.RuneReader
out chan token
buf []rune // running buffer for current lexeme
backup []rune
err error
}
func (l *lexer) lex() {
defer close(l.out)
for fn := lexRoot; fn != nil; {
fn = fn(l)
if l.err != nil {
fn = lexErrorf("read error: %s", l.err)
}
}
}
func (l *lexer) next() rune {
if len(l.backup) > 0 {
r := l.backup[len(l.backup)-1]
l.backup = l.backup[:len(l.backup)-1]
return r
}
r, _, err := l.in.ReadRune()
switch err {
case io.EOF:
return eof
case nil:
return r
default:
l.err = err
return eof
}
return r
}
func (l *lexer) peek() rune {
r := l.next()
l.unread(r)
return r
}
func (l *lexer) keep(r rune) {
if l.buf == nil {
l.buf = make([]rune, 0, 18)
}
l.buf = append(l.buf, r)
}
func (l *lexer) unread(r rune) {
l.backup = append(l.backup, r)
}
func (l *lexer) emit(t tokenType) {
switch t {
case t_variable:
if !l.bufHasSpaces() {
break
}
msg := fmt.Sprintf(`invalid var name: "%s" (var names cannot contain spaces)`, string(l.buf))
l.out <- token{t_error, msg}
return
case t_name:
if !l.bufHasSpaces() {
break
}
msg := fmt.Sprintf(`invalid name: "%s" (names cannot contain spaces)`, string(l.buf))
l.out <- token{t_error, msg}
return
case t_string:
switch string(l.buf) {
case "true", "false":
t = t_bool
}
case t_string_quoted:
t = t_string
}
l.out <- token{t, string(l.buf)}
l.buf = l.buf[0:0]
}
func (l *lexer) accept(chars string) bool {
r := l.next()
if strings.IndexRune(chars, r) >= 0 {
l.keep(r)
return true
} else {
l.unread(r)
return false
}
}
func (l *lexer) acceptRun(chars string) bool {
none := true
for l.accept(chars) {
none = false
}
return !none
}
func (l *lexer) bufHasSpaces() bool {
for _, r := range l.buf {
if unicode.IsSpace(r) {
return true
}
}
return false
}
func lexString(in string) chan token {
r := strings.NewReader(in)
return lex(r)
}
func lex(r io.Reader) chan token {
l := lexer{
in: bufio.NewReader(r),
out: make(chan token),
backup: make([]rune, 0, 4),
}
go l.lex()
return l.out
}
func fullTokens(c chan token) ([]token, error) {
tokens := make([]token, 0, 32)
for t := range c {
if t.t == t_error {
return nil, errors.New(t.s)
}
tokens = append(tokens, t)
}
return tokens, nil
}
func lexErrorf(t string, args ...interface{}) stateFn {
return func(l *lexer) stateFn {
l.out <- token{t_error, fmt.Sprintf(t, args...)}
return nil
}
}
func lexRoot(l *lexer) stateFn {
r := l.next()
switch {
case r == eof:
return nil
case r == ':':
l.keep(r)
l.emit(t_object_separator)
return lexRoot
case r == '"', r == '`':
return lexQuotedString(r)
case r == '#':
return lexComment
case r == '[':
l.keep(r)
l.emit(t_list_start)
return lexRoot
case r == ']':
l.keep(r)
l.emit(t_list_end)
return lexRoot
case r == '{':
l.keep(r)
l.emit(t_object_start)
return lexRoot
case r == '}':
l.keep(r)
l.emit(t_object_end)
return lexRoot
case r == ':':
l.keep(r)
l.emit(t_object_separator)
return lexRoot
case r == '.':
l.keep(r)
return lexAfterPeriod
case r == '<':
if l.peek() == '<' {
l.next()
return lexHeredocStart
}
fallthrough
case r == '@':
return lexVariable
case strings.IndexRune("+-0123456789", r) >= 0:
l.unread(r)
return lexNumber
case unicode.IsSpace(r):
return lexRoot
case unicode.IsPrint(r):
l.keep(r)
return lexNameOrString
default:
return lexErrorf("unexpected rune in lexRoot: %c", r)
}
}
func lexAfterPeriod(l *lexer) stateFn {
r := l.next()
switch {
case strings.IndexRune("+-0123456789", r) >= 0:
l.unread(r)
return lexNumber
case unicode.IsLower(r):
l.keep(r)
return lexNameOrString
default:
return lexErrorf("unexpected rune after period: %c", r)
}
}
func lexComment(l *lexer) stateFn {
switch r := l.next(); r {
case '\n':
l.emit(t_comment)
return lexRoot
case eof:
l.emit(t_comment)
return nil
default:
l.keep(r)
return lexComment
}
}
func lexQuotedString(delim rune) stateFn {
return func(l *lexer) stateFn {
switch r := l.next(); r {
case delim:
l.emit(t_string_quoted)
return lexRoot
case '\\':
switch r := l.next(); r {
case eof:
return lexErrorf("unexpected eof in string literal")
default:
l.keep(r)
return lexQuotedString(delim)
}
case eof:
return lexErrorf("unexpected eof in string literal")
default:
l.keep(r)
return lexQuotedString(delim)
}
}
}
func lexNameOrString(l *lexer) stateFn {
r := l.next()
switch {
case r == '\n', r == ';':
l.emit(t_string)
return lexRoot
case r == ':':
l.emit(t_name)
l.keep(r)
l.emit(t_object_separator)
return lexRoot
case isSpecial(r):
l.emit(t_string)
l.unread(r)
return lexRoot
case r == '\\':
rr := l.next()
if rr == eof {
return lexErrorf("unexpected eof in string or name")
}
l.keep(rr)
return lexNameOrString
case r == eof:
l.emit(t_string)
return nil
case unicode.IsPrint(r):
l.keep(r)
return lexNameOrString
default:
return lexErrorf("unexpected rune in string or name: %c", r)
}
}
func lexVariable(l *lexer) stateFn {
r := l.next()
switch {
case unicode.IsSpace(r), r == ';':
l.emit(t_variable)
return lexRoot
case r == '\\':
rr := l.next()
if rr == eof {
return lexErrorf("unexpected eof in variable name")
}
l.keep(rr)
return lexVariable
case isSpecial(r):
l.emit(t_variable)
l.unread(r)
return lexRoot
case r == eof:
l.emit(t_variable)
return nil
case unicode.IsPrint(r):
l.keep(r)
return lexVariable
default:
return lexErrorf("unexpected rune in var name: %c", r)
}
}
func lexNumber(l *lexer) stateFn {
l.accept("+-")
digits := "0123456789"
if l.accept("0") {
if l.accept("xX") {
digits = "0123456789abcdefABCDEF"
} else {
digits = "01234567"
}
}
l.acceptRun(digits)
if l.accept(".") {
l.acceptRun(digits)
}
if l.accept("eE") {
l.accept("+-")
l.acceptRun("0123456789")
}
imaginary := l.accept("i")
r := l.next()
if isAlphaNumeric(r) {
l.keep(r)
return lexDuration
}
l.unread(r)
if imaginary {
l.emit(t_imaginary_number)
} else {
l.emit(t_real_number)
}
return lexRoot
}
func lexDuration(l *lexer) stateFn {
r := l.next()
switch {
case r == '\n', r == ';':
_, err := time.ParseDuration(string(l.buf))
if err == nil {
l.emit(t_duration)
return lexRoot
}
l.emit(t_string)
return lexRoot
case unicode.IsSpace(r):
_, err := time.ParseDuration(string(l.buf))
if err == nil {
l.emit(t_duration)
return lexRoot
}
l.keep(r)
return lexNameOrString
case r == ':':
_, err := time.ParseDuration(string(l.buf))
if err == nil {
l.emit(t_duration)
} else {
l.emit(t_name)
}
l.keep(r)
l.emit(t_object_separator)
return lexRoot
case isSpecial(r):
_, err := time.ParseDuration(string(l.buf))
if err == nil {
l.emit(t_duration)
} else {
l.emit(t_string)
}
l.unread(r)
return lexRoot
case r == '\\':
l.unread(r)
return lexNameOrString
case r == eof:
_, err := time.ParseDuration(string(l.buf))
if err == nil {
l.emit(t_duration)
} else {
l.emit(t_string)
}
return nil
case unicode.IsPrint(r):
l.keep(r)
return lexDuration
default:
return lexErrorf("unhandled character type in lexDuration: %c", r)
}
}
func lexHeredocStart(l *lexer) stateFn {
r := l.next()
switch {
case r == '\n':
if len(l.buf) == 0 {
return lexErrorf("illegal zero-width heredoc name")
}
label := string(l.buf)
l.buf = l.buf[0:0]
return lexHeredocBody(label)
case unicode.IsUpper(r):
l.keep(r)
return lexHeredocStart
case r == eof:
return lexErrorf("unexpected EOF in lexHeredocStart")
default:
return lexErrorf("unexpected rune in lexHeredocStart: %c (only uppercase letters are ok here)", r)
}
}
func lexHeredocBody(label string) stateFn {
var body bytes.Buffer
line := make([]rune, 0, 128)
return func(l *lexer) stateFn {
for {
r := l.next()
switch r {
case '\n':
if string(line) == label {
l.out <- token{t_string, string(body.Bytes())}
return lexRoot
}
body.WriteString(string(line))
line = line[0:0]
body.WriteRune(r)
case eof:
return lexErrorf("unexpected eof inside of heredoc %s", label)
default:
line = append(line, r)
}
}
}
}
func isAlphaNumeric(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}
func isSpecial(r rune) bool {
return strings.ContainsRune("[]{}:;#", r)
}