parse ... differently

master
Jordan Orelli 10 years ago
parent d369388fa8
commit c9872dbe53

@ -1,6 +1,7 @@
package main
import (
"bufio"
"errors"
"fmt"
"io"
@ -14,6 +15,8 @@ func (t tokenType) String() string {
switch t {
case t_error:
return "t_error"
case t_eof:
return "t_eof"
case t_string:
return "t_string"
case t_name:
@ -22,6 +25,8 @@ func (t tokenType) String() string {
return "t_type"
case t_equals:
return "t_equals"
case t_comment:
return "t_comment"
default:
panic(fmt.Sprintf("unknown token type: %v", t))
}
@ -29,6 +34,7 @@ func (t tokenType) String() string {
const (
t_error tokenType = iota // a stored lex error
t_eof // end of file token
t_string // a string literal
t_name // a name
t_type // a type
@ -59,6 +65,7 @@ func (l *lexer) lex() {
switch err {
case nil:
case io.EOF:
l.out <- token{t_eof, ""}
return
default:
l.out <- token{t_error, err.Error()}
@ -98,9 +105,9 @@ func lexString(in string) chan token {
return lex(r)
}
func lex(r io.RuneReader) chan token {
func lex(r io.Reader) chan token {
l := lexer{
in: r,
in: bufio.NewReader(r),
out: make(chan token),
backup: make([]rune, 0, 4),
}
@ -131,6 +138,8 @@ func lexRoot(l *lexer) (stateFn, error) {
return lexRoot, nil
case r == '"', r == '`':
return lexStringLiteral(r), nil
case r == '#':
return lexComment, nil
case unicode.IsSpace(r):
return lexRoot, nil
case unicode.IsLower(r):
@ -144,6 +153,26 @@ func lexRoot(l *lexer) (stateFn, error) {
}
}
func lexComment(l *lexer) (stateFn, error) {
r, err := l.next()
switch err {
case io.EOF:
l.emit(t_comment)
return nil, io.EOF
case nil:
default:
return nil, err
}
switch {
case r == '\n':
l.emit(t_comment)
return lexRoot, nil
default:
l.keep(r)
return lexComment, nil
}
}
func lexStringLiteral(delim rune) stateFn {
return func(l *lexer) (stateFn, error) {
r, err := l.next()

@ -30,6 +30,11 @@ var primitivesTests = []struct {
{t_equals, "="},
{t_string, "sam"},
}},
{`# this is a comment`, []token{{t_comment, " this is a comment"}}},
{`
# comment line one
# comment line two
`, []token{{t_comment, " comment line one"}, {t_comment, " comment line two"}}},
}
func TestLexPrimities(t *testing.T) {
@ -39,6 +44,7 @@ func TestLexPrimities(t *testing.T) {
t.Error(err)
continue
}
tokens = tokens[:len(tokens)-1]
if len(tokens) != len(test.out) {
t.Errorf("expected %d token, saw %d: %v", len(test.out), len(tokens), tokens)
continue

@ -0,0 +1,128 @@
package main
import (
"bytes"
"fmt"
)
type nodeType int
const (
n_error nodeType = iota
n_root
n_comment
n_assignment
)
type node interface {
Type() nodeType
parse(*parser) error
}
type rootNode struct {
children []node
}
func newRootNode() node {
return &rootNode{children: make([]node, 0, 8)}
}
func (n *rootNode) Type() nodeType {
return n_root
}
func (n *rootNode) parse(p *parser) error {
for {
t := p.next()
switch t.t {
case t_error:
return fmt.Errorf("parse error: saw lex error while parsing root node: %v", t.s)
case t_eof:
return nil
case t_comment:
shit := commentNode(t.s)
n.addChild(&shit)
case t_name:
nn := &assignmentNode{name: t.s}
if err := nn.parse(p); err != nil {
return err
}
n.addChild(nn)
default:
return fmt.Errorf("parse error: unexpected token type %v while parsing root node", t.t)
}
}
}
func (n *rootNode) addChild(child node) {
if n.children == nil {
n.children = make([]node, 0, 8)
}
n.children = append(n.children, child)
}
func (n *rootNode) String() string {
var buf bytes.Buffer
buf.WriteString("{")
for _, child := range n.children {
fmt.Fprintf(&buf, "%s, ", child)
}
if len(n.children) > 0 {
buf.Truncate(buf.Len() - 2)
}
buf.WriteString("}")
return buf.String()
}
type commentNode string
func (n commentNode) Type() nodeType {
return n_comment
}
func (n commentNode) parse(p *parser) error {
return nil
}
func (n commentNode) String() string {
return fmt.Sprintf("{comment: %s}", string(n))
}
type assignmentNode struct {
name string
value interface{}
}
func (n assignmentNode) Type() nodeType {
return n_assignment
}
func (n *assignmentNode) parse(p *parser) error {
t := p.next()
switch t.t {
case t_error:
return fmt.Errorf("parse error: saw lex error while parsing assignment node: %v", t.s)
case t_eof:
return fmt.Errorf("parse error: unexpected eof in assignment node")
case t_equals:
default:
return fmt.Errorf("parse error: unexpected %v token after name, expected =", t.t)
}
t = p.next()
switch t.t {
case t_error:
return fmt.Errorf("parse error: saw lex error while parsing assignment node: %v", t.s)
case t_eof:
return fmt.Errorf("parse error: unexpected eof in assignment node")
case t_string:
n.value = t.s
return nil
default:
return fmt.Errorf("parse error: unexpected %v token after =, expected some kind of value", t.t)
}
}
func (n *assignmentNode) String() string {
return fmt.Sprintf("{assign: name=%s, val=%s}", n.name, n.value)
}

@ -1,134 +1,42 @@
package main
import (
"fmt"
"io"
)
const (
e_no_error parseErrorType = iota
e_lex_error
e_unexpected_eof
e_unexpected_token
)
type parseErrorType int
type parseError struct {
t parseErrorType
m string
}
func (p parseError) Error() string {
return fmt.Sprintf("parse error: %s", p.m)
}
func parseErrorf(t parseErrorType, tpl string, args ...interface{}) error {
return parseError{t: t, m: fmt.Sprintf(tpl, args...)}
}
const ()
type parseFn func(*parser) (parseFn, error)
func parseRoot(p *parser) (parseFn, error) {
if err := p.next(); err != nil {
return nil, err
}
switch p.cur.t {
case t_name:
return parseAfterName(p.cur.s), nil
default:
return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseRoot", p.cur.t)
}
}
func parseAfterName(name string) parseFn {
return func(p *parser) (parseFn, error) {
switch err := p.next(); err {
case io.EOF:
return nil, parseErrorf(e_unexpected_eof, "unexpected eof after name %s", name)
case nil:
default:
return nil, err
}
switch p.cur.t {
case t_equals:
return parseAssign(name), nil
default:
return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseAfterName", p.cur.t)
}
func parse(r io.Reader) (node, error) {
p := &parser{
root: newRootNode(),
input: lex(r),
backup: make([]token, 0, 8),
}
}
func parseAssign(name string) parseFn {
return func(p *parser) (parseFn, error) {
switch err := p.next(); err {
case io.EOF:
return nil, parseErrorf(e_unexpected_eof, "unexpected eof when trying to parse value for name %s", name)
case nil:
default:
return nil, err
}
switch p.cur.t {
case t_string:
p.out.setUnique(name, p.cur.s)
return parseRoot, nil
default:
return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseAssign", p.cur.t)
}
if err := p.parse(); err != nil {
return nil, err
}
return p.root, nil
}
type parser struct {
in chan token
cur token
out *Config
root node
input chan token
backup []token
}
func (p *parser) next() error {
t, ok := <-p.in
if !ok {
return io.EOF
func (p *parser) parse() error {
if p.root == nil {
p.root = newRootNode()
}
if t.t == t_error {
return parseError{e_lex_error, t.s}
}
p.cur = t
return nil
return p.root.parse(p)
}
func (p *parser) run() error {
fn := parseRoot
var err error
for {
fn, err = fn(p)
switch err {
case io.EOF:
return nil
case nil:
default:
return err
}
// returns the next token and advances the input stream
func (p *parser) next() token {
if len(p.backup) > 0 {
t := p.backup[len(p.backup)-1]
p.backup = p.backup[:len(p.backup)-1]
return t
}
}
type assignment struct {
name string
value interface{}
}
func parse(in chan token) (*Config, error) {
p := &parser{
in: in,
out: new(Config),
}
if err := p.run(); err != nil {
return nil, err
}
return p.out, nil
}
func parseString(in string) (*Config, error) {
return parse(lexString(in))
return <-p.input
}

@ -1,127 +1,76 @@
package main
import (
"strings"
"testing"
)
var parseTests = []parseTest{
{
in: ``,
desc: "an empty string is a valid config",
configTests: []configTest{
{
desc: "undefined name field should not exist",
pass: inv(hasKey("name")),
source: ``,
root: &rootNode{
children: []node{},
},
},
{
source: `# just a comment`,
root: &rootNode{
children: []node{
commentNode(" just a comment"),
},
},
},
{
in: `name `,
desc: "eof after name",
errorType: e_unexpected_eof,
source: `name = "jordan"`,
root: &rootNode{
children: []node{
&assignmentNode{
name: "name",
value: "jordan",
},
},
},
},
{
in: `firstname lastname`,
desc: "two names in a row",
errorType: e_unexpected_token,
source: `
first_name = "jordan"
last_name = "orelli"
`,
root: &rootNode{},
},
{
in: `name = `,
desc: "eof after equals",
errorType: e_unexpected_eof,
source: `
# personal info
first_name = "jordan"
last_name = "orelli"
`,
root: &rootNode{},
},
{
in: `name = "jordan"`,
desc: "assign a value",
configTests: []configTest{
{
desc: "should have name",
pass: hasValue("name", "jordan"),
},
},
source: `
first_name = "jordan" # yep, that's my name
last_name = "orelli" # comments should be able to follow other shit
`,
root: &rootNode{},
},
}
// a boolean statement about a config struct
type configPredicate func(*Config) bool
// a suite of tests for parsing potm input
type parseTest struct {
in string
desc string
configTests []configTest
errorType parseErrorType
source string
root *rootNode
}
func (p *parseTest) run(t *testing.T) {
c, err := parseString(p.in)
r := strings.NewReader(p.source)
n, err := parse(r)
if err != nil {
t.Logf("test %s has error %v", p.desc, err)
e, ok := err.(parseError)
if !ok {
t.Errorf("unexpected error: %s", e)
return
}
if p.errorType == e.t {
t.Logf("OK: got expected error type %v for %s", e.t, p.desc)
} else {
t.Errorf("unexpected parse error: %s", e)
return
}
}
t.Logf("parsed config for %s", p.desc)
t.Log(c)
p.runConfigTests(t, c)
}
func (p *parseTest) runConfigTests(t *testing.T, c *Config) {
ok := true
for _, test := range p.configTests {
if test.pass(c) {
t.Logf("OK: %s", test.desc)
} else {
t.Errorf("config predicate failed: %s", test.desc)
ok = false
}
}
if ok {
t.Logf("OK: %s", p.desc)
t.Errorf("parse error: %v", err)
return
}
}
// an individual test for confirming that a parsed config struct meets some
// predicate
type configTest struct {
desc string
pass configPredicate
}
// inverts a given config predicate
func inv(fn configPredicate) configPredicate {
return func(c *Config) bool {
return !fn(c)
}
}
func hasKey(s string) configPredicate {
return func(c *Config) bool {
return c.hasKey(s)
}
}
func hasValue(key string, expected interface{}) configPredicate {
switch t := expected.(type) {
case string:
return hasStringValue(key, t)
default:
panic("no we can't do that yet")
}
}
func hasStringValue(key string, expected string) configPredicate {
return func(c *Config) bool {
return c.GetString(key) == expected
if n.Type() != n_root {
t.Errorf("we expected a root node object, but instead we got: %s", n.Type())
}
t.Logf("output: %v", n)
}
func TestParse(t *testing.T) {

Loading…
Cancel
Save