parse ... differently

master
Jordan Orelli 10 years ago
parent d369388fa8
commit c9872dbe53

@ -1,6 +1,7 @@
package main package main
import ( import (
"bufio"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@ -14,6 +15,8 @@ func (t tokenType) String() string {
switch t { switch t {
case t_error: case t_error:
return "t_error" return "t_error"
case t_eof:
return "t_eof"
case t_string: case t_string:
return "t_string" return "t_string"
case t_name: case t_name:
@ -22,6 +25,8 @@ func (t tokenType) String() string {
return "t_type" return "t_type"
case t_equals: case t_equals:
return "t_equals" return "t_equals"
case t_comment:
return "t_comment"
default: default:
panic(fmt.Sprintf("unknown token type: %v", t)) panic(fmt.Sprintf("unknown token type: %v", t))
} }
@ -29,6 +34,7 @@ func (t tokenType) String() string {
const ( const (
t_error tokenType = iota // a stored lex error t_error tokenType = iota // a stored lex error
t_eof // end of file token
t_string // a string literal t_string // a string literal
t_name // a name t_name // a name
t_type // a type t_type // a type
@ -59,6 +65,7 @@ func (l *lexer) lex() {
switch err { switch err {
case nil: case nil:
case io.EOF: case io.EOF:
l.out <- token{t_eof, ""}
return return
default: default:
l.out <- token{t_error, err.Error()} l.out <- token{t_error, err.Error()}
@ -98,9 +105,9 @@ func lexString(in string) chan token {
return lex(r) return lex(r)
} }
func lex(r io.RuneReader) chan token { func lex(r io.Reader) chan token {
l := lexer{ l := lexer{
in: r, in: bufio.NewReader(r),
out: make(chan token), out: make(chan token),
backup: make([]rune, 0, 4), backup: make([]rune, 0, 4),
} }
@ -131,6 +138,8 @@ func lexRoot(l *lexer) (stateFn, error) {
return lexRoot, nil return lexRoot, nil
case r == '"', r == '`': case r == '"', r == '`':
return lexStringLiteral(r), nil return lexStringLiteral(r), nil
case r == '#':
return lexComment, nil
case unicode.IsSpace(r): case unicode.IsSpace(r):
return lexRoot, nil return lexRoot, nil
case unicode.IsLower(r): case unicode.IsLower(r):
@ -144,6 +153,26 @@ func lexRoot(l *lexer) (stateFn, error) {
} }
} }
func lexComment(l *lexer) (stateFn, error) {
r, err := l.next()
switch err {
case io.EOF:
l.emit(t_comment)
return nil, io.EOF
case nil:
default:
return nil, err
}
switch {
case r == '\n':
l.emit(t_comment)
return lexRoot, nil
default:
l.keep(r)
return lexComment, nil
}
}
func lexStringLiteral(delim rune) stateFn { func lexStringLiteral(delim rune) stateFn {
return func(l *lexer) (stateFn, error) { return func(l *lexer) (stateFn, error) {
r, err := l.next() r, err := l.next()

@ -30,6 +30,11 @@ var primitivesTests = []struct {
{t_equals, "="}, {t_equals, "="},
{t_string, "sam"}, {t_string, "sam"},
}}, }},
{`# this is a comment`, []token{{t_comment, " this is a comment"}}},
{`
# comment line one
# comment line two
`, []token{{t_comment, " comment line one"}, {t_comment, " comment line two"}}},
} }
func TestLexPrimities(t *testing.T) { func TestLexPrimities(t *testing.T) {
@ -39,6 +44,7 @@ func TestLexPrimities(t *testing.T) {
t.Error(err) t.Error(err)
continue continue
} }
tokens = tokens[:len(tokens)-1]
if len(tokens) != len(test.out) { if len(tokens) != len(test.out) {
t.Errorf("expected %d token, saw %d: %v", len(test.out), len(tokens), tokens) t.Errorf("expected %d token, saw %d: %v", len(test.out), len(tokens), tokens)
continue continue

@ -0,0 +1,128 @@
package main
import (
"bytes"
"fmt"
)
type nodeType int
const (
n_error nodeType = iota
n_root
n_comment
n_assignment
)
type node interface {
Type() nodeType
parse(*parser) error
}
type rootNode struct {
children []node
}
func newRootNode() node {
return &rootNode{children: make([]node, 0, 8)}
}
func (n *rootNode) Type() nodeType {
return n_root
}
func (n *rootNode) parse(p *parser) error {
for {
t := p.next()
switch t.t {
case t_error:
return fmt.Errorf("parse error: saw lex error while parsing root node: %v", t.s)
case t_eof:
return nil
case t_comment:
shit := commentNode(t.s)
n.addChild(&shit)
case t_name:
nn := &assignmentNode{name: t.s}
if err := nn.parse(p); err != nil {
return err
}
n.addChild(nn)
default:
return fmt.Errorf("parse error: unexpected token type %v while parsing root node", t.t)
}
}
}
func (n *rootNode) addChild(child node) {
if n.children == nil {
n.children = make([]node, 0, 8)
}
n.children = append(n.children, child)
}
func (n *rootNode) String() string {
var buf bytes.Buffer
buf.WriteString("{")
for _, child := range n.children {
fmt.Fprintf(&buf, "%s, ", child)
}
if len(n.children) > 0 {
buf.Truncate(buf.Len() - 2)
}
buf.WriteString("}")
return buf.String()
}
type commentNode string
func (n commentNode) Type() nodeType {
return n_comment
}
func (n commentNode) parse(p *parser) error {
return nil
}
func (n commentNode) String() string {
return fmt.Sprintf("{comment: %s}", string(n))
}
type assignmentNode struct {
name string
value interface{}
}
func (n assignmentNode) Type() nodeType {
return n_assignment
}
func (n *assignmentNode) parse(p *parser) error {
t := p.next()
switch t.t {
case t_error:
return fmt.Errorf("parse error: saw lex error while parsing assignment node: %v", t.s)
case t_eof:
return fmt.Errorf("parse error: unexpected eof in assignment node")
case t_equals:
default:
return fmt.Errorf("parse error: unexpected %v token after name, expected =", t.t)
}
t = p.next()
switch t.t {
case t_error:
return fmt.Errorf("parse error: saw lex error while parsing assignment node: %v", t.s)
case t_eof:
return fmt.Errorf("parse error: unexpected eof in assignment node")
case t_string:
n.value = t.s
return nil
default:
return fmt.Errorf("parse error: unexpected %v token after =, expected some kind of value", t.t)
}
}
func (n *assignmentNode) String() string {
return fmt.Sprintf("{assign: name=%s, val=%s}", n.name, n.value)
}

@ -1,134 +1,42 @@
package main package main
import ( import (
"fmt"
"io" "io"
) )
const ( const ()
e_no_error parseErrorType = iota
e_lex_error
e_unexpected_eof
e_unexpected_token
)
type parseErrorType int
type parseError struct {
t parseErrorType
m string
}
func (p parseError) Error() string {
return fmt.Sprintf("parse error: %s", p.m)
}
func parseErrorf(t parseErrorType, tpl string, args ...interface{}) error {
return parseError{t: t, m: fmt.Sprintf(tpl, args...)}
}
type parseFn func(*parser) (parseFn, error)
func parseRoot(p *parser) (parseFn, error) { func parse(r io.Reader) (node, error) {
if err := p.next(); err != nil { p := &parser{
return nil, err root: newRootNode(),
} input: lex(r),
switch p.cur.t { backup: make([]token, 0, 8),
case t_name:
return parseAfterName(p.cur.s), nil
default:
return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseRoot", p.cur.t)
}
}
func parseAfterName(name string) parseFn {
return func(p *parser) (parseFn, error) {
switch err := p.next(); err {
case io.EOF:
return nil, parseErrorf(e_unexpected_eof, "unexpected eof after name %s", name)
case nil:
default:
return nil, err
}
switch p.cur.t {
case t_equals:
return parseAssign(name), nil
default:
return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseAfterName", p.cur.t)
}
}
} }
if err := p.parse(); err != nil {
func parseAssign(name string) parseFn {
return func(p *parser) (parseFn, error) {
switch err := p.next(); err {
case io.EOF:
return nil, parseErrorf(e_unexpected_eof, "unexpected eof when trying to parse value for name %s", name)
case nil:
default:
return nil, err return nil, err
} }
return p.root, nil
switch p.cur.t {
case t_string:
p.out.setUnique(name, p.cur.s)
return parseRoot, nil
default:
return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseAssign", p.cur.t)
}
}
} }
type parser struct { type parser struct {
in chan token root node
cur token input chan token
out *Config backup []token
}
func (p *parser) next() error {
t, ok := <-p.in
if !ok {
return io.EOF
}
if t.t == t_error {
return parseError{e_lex_error, t.s}
}
p.cur = t
return nil
} }
func (p *parser) run() error { func (p *parser) parse() error {
fn := parseRoot if p.root == nil {
var err error p.root = newRootNode()
for {
fn, err = fn(p)
switch err {
case io.EOF:
return nil
case nil:
default:
return err
}
} }
return p.root.parse(p)
} }
type assignment struct { // returns the next token and advances the input stream
name string func (p *parser) next() token {
value interface{} if len(p.backup) > 0 {
} t := p.backup[len(p.backup)-1]
p.backup = p.backup[:len(p.backup)-1]
func parse(in chan token) (*Config, error) { return t
p := &parser{
in: in,
out: new(Config),
} }
if err := p.run(); err != nil { return <-p.input
return nil, err
}
return p.out, nil
}
func parseString(in string) (*Config, error) {
return parse(lexString(in))
} }

@ -1,127 +1,76 @@
package main package main
import ( import (
"strings"
"testing" "testing"
) )
var parseTests = []parseTest{ var parseTests = []parseTest{
{ {
in: ``, source: ``,
desc: "an empty string is a valid config", root: &rootNode{
configTests: []configTest{ children: []node{},
},
},
{ {
desc: "undefined name field should not exist", source: `# just a comment`,
pass: inv(hasKey("name")), root: &rootNode{
children: []node{
commentNode(" just a comment"),
}, },
}, },
}, },
{ {
in: `name `, source: `name = "jordan"`,
desc: "eof after name", root: &rootNode{
errorType: e_unexpected_eof, children: []node{
&assignmentNode{
name: "name",
value: "jordan",
},
}, },
{
in: `firstname lastname`,
desc: "two names in a row",
errorType: e_unexpected_token,
}, },
{
in: `name = `,
desc: "eof after equals",
errorType: e_unexpected_eof,
}, },
{ {
in: `name = "jordan"`, source: `
desc: "assign a value", first_name = "jordan"
configTests: []configTest{ last_name = "orelli"
{ `,
desc: "should have name", root: &rootNode{},
pass: hasValue("name", "jordan"),
}, },
{
source: `
# personal info
first_name = "jordan"
last_name = "orelli"
`,
root: &rootNode{},
}, },
{
source: `
first_name = "jordan" # yep, that's my name
last_name = "orelli" # comments should be able to follow other shit
`,
root: &rootNode{},
}, },
} }
// a boolean statement about a config struct
type configPredicate func(*Config) bool
// a suite of tests for parsing potm input
type parseTest struct { type parseTest struct {
in string source string
desc string root *rootNode
configTests []configTest
errorType parseErrorType
} }
func (p *parseTest) run(t *testing.T) { func (p *parseTest) run(t *testing.T) {
c, err := parseString(p.in) r := strings.NewReader(p.source)
n, err := parse(r)
if err != nil { if err != nil {
t.Logf("test %s has error %v", p.desc, err) t.Errorf("parse error: %v", err)
e, ok := err.(parseError)
if !ok {
t.Errorf("unexpected error: %s", e)
return
}
if p.errorType == e.t {
t.Logf("OK: got expected error type %v for %s", e.t, p.desc)
} else {
t.Errorf("unexpected parse error: %s", e)
return return
} }
if n.Type() != n_root {
t.Errorf("we expected a root node object, but instead we got: %s", n.Type())
} }
t.Logf("parsed config for %s", p.desc) t.Logf("output: %v", n)
t.Log(c)
p.runConfigTests(t, c)
}
func (p *parseTest) runConfigTests(t *testing.T, c *Config) {
ok := true
for _, test := range p.configTests {
if test.pass(c) {
t.Logf("OK: %s", test.desc)
} else {
t.Errorf("config predicate failed: %s", test.desc)
ok = false
}
}
if ok {
t.Logf("OK: %s", p.desc)
}
}
// an individual test for confirming that a parsed config struct meets some
// predicate
type configTest struct {
desc string
pass configPredicate
}
// inverts a given config predicate
func inv(fn configPredicate) configPredicate {
return func(c *Config) bool {
return !fn(c)
}
}
func hasKey(s string) configPredicate {
return func(c *Config) bool {
return c.hasKey(s)
}
}
func hasValue(key string, expected interface{}) configPredicate {
switch t := expected.(type) {
case string:
return hasStringValue(key, t)
default:
panic("no we can't do that yet")
}
}
func hasStringValue(key string, expected string) configPredicate {
return func(c *Config) bool {
return c.GetString(key) == expected
}
} }
func TestParse(t *testing.T) { func TestParse(t *testing.T) {

Loading…
Cancel
Save