|
|
|
package moon
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
"unicode"
|
|
|
|
)
|
|
|
|
|
|
|
|
const eof = -1
|
|
|
|
|
|
|
|
type tokenType int
|
|
|
|
|
|
|
|
func (t tokenType) String() string {
|
|
|
|
switch t {
|
|
|
|
case t_error:
|
|
|
|
return "t_error"
|
|
|
|
case t_eof:
|
|
|
|
return "t_eof"
|
|
|
|
case t_string:
|
|
|
|
return "t_string"
|
|
|
|
case t_name:
|
|
|
|
return "t_name"
|
|
|
|
case t_comment:
|
|
|
|
return "t_comment"
|
|
|
|
case t_list_start:
|
|
|
|
return "t_list_start"
|
|
|
|
case t_list_end:
|
|
|
|
return "t_list_end"
|
|
|
|
case t_object_start:
|
|
|
|
return "t_object_start"
|
|
|
|
case t_object_separator:
|
|
|
|
return "t_object_separator"
|
|
|
|
case t_object_end:
|
|
|
|
return "t_object_end"
|
|
|
|
case t_real_number:
|
|
|
|
return "t_real_number"
|
|
|
|
case t_imaginary_number:
|
|
|
|
return "t_imaginary_number"
|
|
|
|
case t_variable:
|
|
|
|
return "t_variable"
|
|
|
|
case t_bool:
|
|
|
|
return "t_bool"
|
|
|
|
case t_duration:
|
|
|
|
return "t_duration"
|
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("unknown token type: %v", t))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
|
|
|
t_error tokenType = iota // a stored lex error
|
|
|
|
t_eof // end of file token
|
|
|
|
t_string // a bare string
|
|
|
|
t_string_quoted // a quoted string
|
|
|
|
t_name // a name
|
|
|
|
t_comment // a comment
|
|
|
|
t_list_start // [
|
|
|
|
t_list_end // ]
|
|
|
|
t_object_start // {
|
|
|
|
t_object_end // }
|
|
|
|
t_object_separator // :
|
|
|
|
t_real_number // a number
|
|
|
|
t_imaginary_number // an imaginary number
|
|
|
|
t_variable // e.g. @var_name, a variable name.
|
|
|
|
t_bool // a boolean token (true|false)
|
|
|
|
t_duration // a duration (e.g.: 1s, 2h45m, 900ms)
|
|
|
|
)
|
|
|
|
|
|
|
|
type stateFn func(*lexer) stateFn
|
|
|
|
|
|
|
|
type token struct {
|
|
|
|
t tokenType
|
|
|
|
s string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t token) String() string {
|
|
|
|
return fmt.Sprintf("{%s %s}", t.t, t.s)
|
|
|
|
}
|
|
|
|
|
|
|
|
type lexer struct {
|
|
|
|
in io.RuneReader
|
|
|
|
out chan token
|
|
|
|
buf []rune // running buffer for current lexeme
|
|
|
|
backup []rune
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) lex() {
|
|
|
|
defer close(l.out)
|
|
|
|
for fn := lexRoot; fn != nil; {
|
|
|
|
fn = fn(l)
|
|
|
|
if l.err != nil {
|
|
|
|
fn = lexErrorf("read error: %s", l.err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) next() rune {
|
|
|
|
if len(l.backup) > 0 {
|
|
|
|
r := l.backup[len(l.backup)-1]
|
|
|
|
l.backup = l.backup[:len(l.backup)-1]
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
r, _, err := l.in.ReadRune()
|
|
|
|
switch err {
|
|
|
|
case io.EOF:
|
|
|
|
return eof
|
|
|
|
case nil:
|
|
|
|
return r
|
|
|
|
default:
|
|
|
|
l.err = err
|
|
|
|
return eof
|
|
|
|
}
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) peek() rune {
|
|
|
|
r := l.next()
|
|
|
|
l.unread(r)
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) keep(r rune) {
|
|
|
|
if l.buf == nil {
|
|
|
|
l.buf = make([]rune, 0, 18)
|
|
|
|
}
|
|
|
|
l.buf = append(l.buf, r)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) unread(r rune) {
|
|
|
|
l.backup = append(l.backup, r)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) emit(t tokenType) {
|
|
|
|
switch t {
|
|
|
|
case t_variable:
|
|
|
|
if !l.bufHasSpaces() {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
msg := fmt.Sprintf(`invalid var name: "%s" (var names cannot contain spaces)`, string(l.buf))
|
|
|
|
l.out <- token{t_error, msg}
|
|
|
|
return
|
|
|
|
case t_name:
|
|
|
|
if !l.bufHasSpaces() {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
msg := fmt.Sprintf(`invalid name: "%s" (names cannot contain spaces)`, string(l.buf))
|
|
|
|
l.out <- token{t_error, msg}
|
|
|
|
return
|
|
|
|
case t_string:
|
|
|
|
switch string(l.buf) {
|
|
|
|
case "true", "false":
|
|
|
|
t = t_bool
|
|
|
|
}
|
|
|
|
case t_string_quoted:
|
|
|
|
t = t_string
|
|
|
|
}
|
|
|
|
l.out <- token{t, string(l.buf)}
|
|
|
|
l.buf = l.buf[0:0]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) accept(chars string) bool {
|
|
|
|
r := l.next()
|
|
|
|
if strings.IndexRune(chars, r) >= 0 {
|
|
|
|
l.keep(r)
|
|
|
|
return true
|
|
|
|
} else {
|
|
|
|
l.unread(r)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) acceptRun(chars string) bool {
|
|
|
|
none := true
|
|
|
|
for l.accept(chars) {
|
|
|
|
none = false
|
|
|
|
}
|
|
|
|
return !none
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) bufHasSpaces() bool {
|
|
|
|
for _, r := range l.buf {
|
|
|
|
if unicode.IsSpace(r) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexString(in string) chan token {
|
|
|
|
r := strings.NewReader(in)
|
|
|
|
return lex(r)
|
|
|
|
}
|
|
|
|
|
|
|
|
func lex(r io.Reader) chan token {
|
|
|
|
l := lexer{
|
|
|
|
in: bufio.NewReader(r),
|
|
|
|
out: make(chan token),
|
|
|
|
backup: make([]rune, 0, 4),
|
|
|
|
}
|
|
|
|
go l.lex()
|
|
|
|
return l.out
|
|
|
|
}
|
|
|
|
|
|
|
|
func fullTokens(c chan token) ([]token, error) {
|
|
|
|
tokens := make([]token, 0, 32)
|
|
|
|
for t := range c {
|
|
|
|
if t.t == t_error {
|
|
|
|
return nil, errors.New(t.s)
|
|
|
|
}
|
|
|
|
tokens = append(tokens, t)
|
|
|
|
}
|
|
|
|
return tokens, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexErrorf(t string, args ...interface{}) stateFn {
|
|
|
|
return func(l *lexer) stateFn {
|
|
|
|
l.out <- token{t_error, fmt.Sprintf(t, args...)}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexRoot(l *lexer) stateFn {
|
|
|
|
r := l.next()
|
|
|
|
switch {
|
|
|
|
case r == eof:
|
|
|
|
return nil
|
|
|
|
case r == ':':
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_object_separator)
|
|
|
|
return lexRoot
|
|
|
|
case r == '"', r == '`':
|
|
|
|
return lexQuotedString(r)
|
|
|
|
case r == '#':
|
|
|
|
return lexComment
|
|
|
|
case r == '[':
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_list_start)
|
|
|
|
return lexRoot
|
|
|
|
case r == ']':
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_list_end)
|
|
|
|
return lexRoot
|
|
|
|
case r == '{':
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_object_start)
|
|
|
|
return lexRoot
|
|
|
|
case r == '}':
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_object_end)
|
|
|
|
return lexRoot
|
|
|
|
case r == ':':
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_object_separator)
|
|
|
|
return lexRoot
|
|
|
|
case r == '.':
|
|
|
|
l.keep(r)
|
|
|
|
return lexAfterPeriod
|
|
|
|
case r == '<':
|
|
|
|
if l.peek() == '<' {
|
|
|
|
l.next()
|
|
|
|
return lexHeredocStart
|
|
|
|
}
|
|
|
|
fallthrough
|
|
|
|
case r == '@':
|
|
|
|
return lexVariable
|
|
|
|
case strings.IndexRune("+-0123456789", r) >= 0:
|
|
|
|
l.unread(r)
|
|
|
|
return lexNumber
|
|
|
|
case unicode.IsSpace(r):
|
|
|
|
return lexRoot
|
|
|
|
case unicode.IsPrint(r):
|
|
|
|
l.keep(r)
|
|
|
|
return lexNameOrString
|
|
|
|
default:
|
|
|
|
return lexErrorf("unexpected rune in lexRoot: %c", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexAfterPeriod(l *lexer) stateFn {
|
|
|
|
r := l.next()
|
|
|
|
switch {
|
|
|
|
case strings.IndexRune("+-0123456789", r) >= 0:
|
|
|
|
l.unread(r)
|
|
|
|
return lexNumber
|
|
|
|
case unicode.IsLower(r):
|
|
|
|
l.keep(r)
|
|
|
|
return lexNameOrString
|
|
|
|
default:
|
|
|
|
return lexErrorf("unexpected rune after period: %c", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexComment(l *lexer) stateFn {
|
|
|
|
switch r := l.next(); r {
|
|
|
|
case '\n':
|
|
|
|
l.emit(t_comment)
|
|
|
|
return lexRoot
|
|
|
|
case eof:
|
|
|
|
l.emit(t_comment)
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
l.keep(r)
|
|
|
|
return lexComment
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexQuotedString(delim rune) stateFn {
|
|
|
|
return func(l *lexer) stateFn {
|
|
|
|
switch r := l.next(); r {
|
|
|
|
case delim:
|
|
|
|
l.emit(t_string_quoted)
|
|
|
|
return lexRoot
|
|
|
|
case '\\':
|
|
|
|
switch r := l.next(); r {
|
|
|
|
case eof:
|
|
|
|
return lexErrorf("unexpected eof in string literal")
|
|
|
|
default:
|
|
|
|
l.keep(r)
|
|
|
|
return lexQuotedString(delim)
|
|
|
|
}
|
|
|
|
case eof:
|
|
|
|
return lexErrorf("unexpected eof in string literal")
|
|
|
|
default:
|
|
|
|
l.keep(r)
|
|
|
|
return lexQuotedString(delim)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexNameOrString(l *lexer) stateFn {
|
|
|
|
r := l.next()
|
|
|
|
switch {
|
|
|
|
case r == '\n', r == ';':
|
|
|
|
l.emit(t_string)
|
|
|
|
return lexRoot
|
|
|
|
case r == ':':
|
|
|
|
l.emit(t_name)
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_object_separator)
|
|
|
|
return lexRoot
|
|
|
|
case isSpecial(r):
|
|
|
|
l.emit(t_string)
|
|
|
|
l.unread(r)
|
|
|
|
return lexRoot
|
|
|
|
case r == '\\':
|
|
|
|
rr := l.next()
|
|
|
|
if rr == eof {
|
|
|
|
return lexErrorf("unexpected eof in string or name")
|
|
|
|
}
|
|
|
|
l.keep(rr)
|
|
|
|
return lexNameOrString
|
|
|
|
case r == eof:
|
|
|
|
l.emit(t_string)
|
|
|
|
return nil
|
|
|
|
case unicode.IsPrint(r):
|
|
|
|
l.keep(r)
|
|
|
|
return lexNameOrString
|
|
|
|
default:
|
|
|
|
return lexErrorf("unexpected rune in string or name: %c", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexVariable(l *lexer) stateFn {
|
|
|
|
r := l.next()
|
|
|
|
switch {
|
|
|
|
case unicode.IsSpace(r), r == ';':
|
|
|
|
l.emit(t_variable)
|
|
|
|
return lexRoot
|
|
|
|
case r == '\\':
|
|
|
|
rr := l.next()
|
|
|
|
if rr == eof {
|
|
|
|
return lexErrorf("unexpected eof in variable name")
|
|
|
|
}
|
|
|
|
l.keep(rr)
|
|
|
|
return lexVariable
|
|
|
|
case isSpecial(r):
|
|
|
|
l.emit(t_variable)
|
|
|
|
l.unread(r)
|
|
|
|
return lexRoot
|
|
|
|
case r == eof:
|
|
|
|
l.emit(t_variable)
|
|
|
|
return nil
|
|
|
|
case unicode.IsPrint(r):
|
|
|
|
l.keep(r)
|
|
|
|
return lexVariable
|
|
|
|
default:
|
|
|
|
return lexErrorf("unexpected rune in var name: %c", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexNumber(l *lexer) stateFn {
|
|
|
|
l.accept("+-")
|
|
|
|
digits := "0123456789"
|
|
|
|
if l.accept("0") {
|
|
|
|
if l.accept("xX") {
|
|
|
|
digits = "0123456789abcdefABCDEF"
|
|
|
|
} else {
|
|
|
|
digits = "01234567"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
l.acceptRun(digits)
|
|
|
|
if l.accept(".") {
|
|
|
|
l.acceptRun(digits)
|
|
|
|
}
|
|
|
|
if l.accept("eE") {
|
|
|
|
l.accept("+-")
|
|
|
|
l.acceptRun("0123456789")
|
|
|
|
}
|
|
|
|
imaginary := l.accept("i")
|
|
|
|
r := l.next()
|
|
|
|
if isAlphaNumeric(r) {
|
|
|
|
l.keep(r)
|
|
|
|
return lexDuration
|
|
|
|
}
|
|
|
|
l.unread(r)
|
|
|
|
if imaginary {
|
|
|
|
l.emit(t_imaginary_number)
|
|
|
|
} else {
|
|
|
|
l.emit(t_real_number)
|
|
|
|
}
|
|
|
|
return lexRoot
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexDuration(l *lexer) stateFn {
|
|
|
|
r := l.next()
|
|
|
|
switch {
|
|
|
|
case r == '\n', r == ';':
|
|
|
|
_, err := time.ParseDuration(string(l.buf))
|
|
|
|
if err == nil {
|
|
|
|
l.emit(t_duration)
|
|
|
|
return lexRoot
|
|
|
|
}
|
|
|
|
l.emit(t_string)
|
|
|
|
return lexRoot
|
|
|
|
case unicode.IsSpace(r):
|
|
|
|
_, err := time.ParseDuration(string(l.buf))
|
|
|
|
if err == nil {
|
|
|
|
l.emit(t_duration)
|
|
|
|
return lexRoot
|
|
|
|
}
|
|
|
|
l.keep(r)
|
|
|
|
return lexNameOrString
|
|
|
|
case r == ':':
|
|
|
|
_, err := time.ParseDuration(string(l.buf))
|
|
|
|
if err == nil {
|
|
|
|
l.emit(t_duration)
|
|
|
|
} else {
|
|
|
|
l.emit(t_name)
|
|
|
|
}
|
|
|
|
l.keep(r)
|
|
|
|
l.emit(t_object_separator)
|
|
|
|
return lexRoot
|
|
|
|
case isSpecial(r):
|
|
|
|
_, err := time.ParseDuration(string(l.buf))
|
|
|
|
if err == nil {
|
|
|
|
l.emit(t_duration)
|
|
|
|
} else {
|
|
|
|
l.emit(t_string)
|
|
|
|
}
|
|
|
|
l.unread(r)
|
|
|
|
return lexRoot
|
|
|
|
case r == '\\':
|
|
|
|
l.unread(r)
|
|
|
|
return lexNameOrString
|
|
|
|
case r == eof:
|
|
|
|
_, err := time.ParseDuration(string(l.buf))
|
|
|
|
if err == nil {
|
|
|
|
l.emit(t_duration)
|
|
|
|
} else {
|
|
|
|
l.emit(t_string)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
case unicode.IsPrint(r):
|
|
|
|
l.keep(r)
|
|
|
|
return lexDuration
|
|
|
|
default:
|
|
|
|
return lexErrorf("unhandled character type in lexDuration: %c", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexHeredocStart(l *lexer) stateFn {
|
|
|
|
r := l.next()
|
|
|
|
switch {
|
|
|
|
case r == '\n':
|
|
|
|
if len(l.buf) == 0 {
|
|
|
|
return lexErrorf("illegal zero-width heredoc name")
|
|
|
|
}
|
|
|
|
label := string(l.buf)
|
|
|
|
l.buf = l.buf[0:0]
|
|
|
|
return lexHeredocBody(label)
|
|
|
|
case unicode.IsUpper(r):
|
|
|
|
l.keep(r)
|
|
|
|
return lexHeredocStart
|
|
|
|
case r == eof:
|
|
|
|
return lexErrorf("unexpected EOF in lexHeredocStart")
|
|
|
|
default:
|
|
|
|
return lexErrorf("unexpected rune in lexHeredocStart: %c (only uppercase letters are ok here)", r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexHeredocBody(label string) stateFn {
|
|
|
|
var body bytes.Buffer
|
|
|
|
line := make([]rune, 0, 128)
|
|
|
|
return func(l *lexer) stateFn {
|
|
|
|
for {
|
|
|
|
r := l.next()
|
|
|
|
switch r {
|
|
|
|
case '\n':
|
|
|
|
if string(line) == label {
|
|
|
|
l.out <- token{t_string, string(body.Bytes())}
|
|
|
|
return lexRoot
|
|
|
|
}
|
|
|
|
body.WriteString(string(line))
|
|
|
|
line = line[0:0]
|
|
|
|
body.WriteRune(r)
|
|
|
|
case eof:
|
|
|
|
return lexErrorf("unexpected eof inside of heredoc %s", label)
|
|
|
|
default:
|
|
|
|
line = append(line, r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func isAlphaNumeric(r rune) bool {
|
|
|
|
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
|
|
|
|
}
|
|
|
|
|
|
|
|
func isSpecial(r rune) bool {
|
|
|
|
return strings.ContainsRune("[]{}:;#", r)
|
|
|
|
}
|