From c9872dbe534218ed1d1d4f3fd324f33d64fefd29 Mon Sep 17 00:00:00 2001 From: Jordan Orelli Date: Sat, 21 Mar 2015 21:08:50 -0400 Subject: [PATCH] parse ... differently --- lex.go | 33 +++++++++++- lex_test.go | 6 +++ node.go | 128 +++++++++++++++++++++++++++++++++++++++++++++ parse.go | 138 ++++++++---------------------------------------- parse_test.go | 141 ++++++++++++++++---------------------------------- 5 files changed, 233 insertions(+), 213 deletions(-) create mode 100644 node.go diff --git a/lex.go b/lex.go index f73a843..ee8c36f 100644 --- a/lex.go +++ b/lex.go @@ -1,6 +1,7 @@ package main import ( + "bufio" "errors" "fmt" "io" @@ -14,6 +15,8 @@ func (t tokenType) String() string { switch t { case t_error: return "t_error" + case t_eof: + return "t_eof" case t_string: return "t_string" case t_name: @@ -22,6 +25,8 @@ func (t tokenType) String() string { return "t_type" case t_equals: return "t_equals" + case t_comment: + return "t_comment" default: panic(fmt.Sprintf("unknown token type: %v", t)) } @@ -29,6 +34,7 @@ func (t tokenType) String() string { const ( t_error tokenType = iota // a stored lex error + t_eof // end of file token t_string // a string literal t_name // a name t_type // a type @@ -59,6 +65,7 @@ func (l *lexer) lex() { switch err { case nil: case io.EOF: + l.out <- token{t_eof, ""} return default: l.out <- token{t_error, err.Error()} @@ -98,9 +105,9 @@ func lexString(in string) chan token { return lex(r) } -func lex(r io.RuneReader) chan token { +func lex(r io.Reader) chan token { l := lexer{ - in: r, + in: bufio.NewReader(r), out: make(chan token), backup: make([]rune, 0, 4), } @@ -131,6 +138,8 @@ func lexRoot(l *lexer) (stateFn, error) { return lexRoot, nil case r == '"', r == '`': return lexStringLiteral(r), nil + case r == '#': + return lexComment, nil case unicode.IsSpace(r): return lexRoot, nil case unicode.IsLower(r): @@ -144,6 +153,26 @@ func lexRoot(l *lexer) (stateFn, error) { } } +func lexComment(l *lexer) (stateFn, error) { + r, err := l.next() + switch err { + case io.EOF: + l.emit(t_comment) + return nil, io.EOF + case nil: + default: + return nil, err + } + switch { + case r == '\n': + l.emit(t_comment) + return lexRoot, nil + default: + l.keep(r) + return lexComment, nil + } +} + func lexStringLiteral(delim rune) stateFn { return func(l *lexer) (stateFn, error) { r, err := l.next() diff --git a/lex_test.go b/lex_test.go index f891b1e..60e8baf 100644 --- a/lex_test.go +++ b/lex_test.go @@ -30,6 +30,11 @@ var primitivesTests = []struct { {t_equals, "="}, {t_string, "sam"}, }}, + {`# this is a comment`, []token{{t_comment, " this is a comment"}}}, + {` + # comment line one + # comment line two + `, []token{{t_comment, " comment line one"}, {t_comment, " comment line two"}}}, } func TestLexPrimities(t *testing.T) { @@ -39,6 +44,7 @@ func TestLexPrimities(t *testing.T) { t.Error(err) continue } + tokens = tokens[:len(tokens)-1] if len(tokens) != len(test.out) { t.Errorf("expected %d token, saw %d: %v", len(test.out), len(tokens), tokens) continue diff --git a/node.go b/node.go new file mode 100644 index 0000000..399dd4b --- /dev/null +++ b/node.go @@ -0,0 +1,128 @@ +package main + +import ( + "bytes" + "fmt" +) + +type nodeType int + +const ( + n_error nodeType = iota + n_root + n_comment + n_assignment +) + +type node interface { + Type() nodeType + parse(*parser) error +} + +type rootNode struct { + children []node +} + +func newRootNode() node { + return &rootNode{children: make([]node, 0, 8)} +} + +func (n *rootNode) Type() nodeType { + return n_root +} + +func (n *rootNode) parse(p *parser) error { + for { + t := p.next() + switch t.t { + case t_error: + return fmt.Errorf("parse error: saw lex error while parsing root node: %v", t.s) + case t_eof: + return nil + case t_comment: + shit := commentNode(t.s) + n.addChild(&shit) + case t_name: + nn := &assignmentNode{name: t.s} + if err := nn.parse(p); err != nil { + return err + } + n.addChild(nn) + default: + return fmt.Errorf("parse error: unexpected token type %v while parsing root node", t.t) + } + } +} + +func (n *rootNode) addChild(child node) { + if n.children == nil { + n.children = make([]node, 0, 8) + } + n.children = append(n.children, child) +} + +func (n *rootNode) String() string { + var buf bytes.Buffer + buf.WriteString("{") + for _, child := range n.children { + fmt.Fprintf(&buf, "%s, ", child) + } + if len(n.children) > 0 { + buf.Truncate(buf.Len() - 2) + } + buf.WriteString("}") + return buf.String() +} + +type commentNode string + +func (n commentNode) Type() nodeType { + return n_comment +} + +func (n commentNode) parse(p *parser) error { + return nil +} + +func (n commentNode) String() string { + return fmt.Sprintf("{comment: %s}", string(n)) +} + +type assignmentNode struct { + name string + value interface{} +} + +func (n assignmentNode) Type() nodeType { + return n_assignment +} + +func (n *assignmentNode) parse(p *parser) error { + t := p.next() + switch t.t { + case t_error: + return fmt.Errorf("parse error: saw lex error while parsing assignment node: %v", t.s) + case t_eof: + return fmt.Errorf("parse error: unexpected eof in assignment node") + case t_equals: + default: + return fmt.Errorf("parse error: unexpected %v token after name, expected =", t.t) + } + + t = p.next() + switch t.t { + case t_error: + return fmt.Errorf("parse error: saw lex error while parsing assignment node: %v", t.s) + case t_eof: + return fmt.Errorf("parse error: unexpected eof in assignment node") + case t_string: + n.value = t.s + return nil + default: + return fmt.Errorf("parse error: unexpected %v token after =, expected some kind of value", t.t) + } +} + +func (n *assignmentNode) String() string { + return fmt.Sprintf("{assign: name=%s, val=%s}", n.name, n.value) +} diff --git a/parse.go b/parse.go index a34e850..f6178c8 100644 --- a/parse.go +++ b/parse.go @@ -1,134 +1,42 @@ package main import ( - "fmt" "io" ) -const ( - e_no_error parseErrorType = iota - e_lex_error - e_unexpected_eof - e_unexpected_token -) - -type parseErrorType int - -type parseError struct { - t parseErrorType - m string -} - -func (p parseError) Error() string { - return fmt.Sprintf("parse error: %s", p.m) -} - -func parseErrorf(t parseErrorType, tpl string, args ...interface{}) error { - return parseError{t: t, m: fmt.Sprintf(tpl, args...)} -} +const () -type parseFn func(*parser) (parseFn, error) - -func parseRoot(p *parser) (parseFn, error) { - if err := p.next(); err != nil { - return nil, err - } - switch p.cur.t { - case t_name: - return parseAfterName(p.cur.s), nil - default: - return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseRoot", p.cur.t) - } -} - -func parseAfterName(name string) parseFn { - return func(p *parser) (parseFn, error) { - switch err := p.next(); err { - case io.EOF: - return nil, parseErrorf(e_unexpected_eof, "unexpected eof after name %s", name) - case nil: - default: - return nil, err - } - - switch p.cur.t { - case t_equals: - return parseAssign(name), nil - default: - return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseAfterName", p.cur.t) - } +func parse(r io.Reader) (node, error) { + p := &parser{ + root: newRootNode(), + input: lex(r), + backup: make([]token, 0, 8), } -} - -func parseAssign(name string) parseFn { - return func(p *parser) (parseFn, error) { - switch err := p.next(); err { - case io.EOF: - return nil, parseErrorf(e_unexpected_eof, "unexpected eof when trying to parse value for name %s", name) - case nil: - default: - return nil, err - } - - switch p.cur.t { - case t_string: - p.out.setUnique(name, p.cur.s) - return parseRoot, nil - default: - return nil, parseErrorf(e_unexpected_token, "unexpected %s token in parseAssign", p.cur.t) - } + if err := p.parse(); err != nil { + return nil, err } + return p.root, nil } type parser struct { - in chan token - cur token - out *Config + root node + input chan token + backup []token } -func (p *parser) next() error { - t, ok := <-p.in - if !ok { - return io.EOF +func (p *parser) parse() error { + if p.root == nil { + p.root = newRootNode() } - if t.t == t_error { - return parseError{e_lex_error, t.s} - } - p.cur = t - return nil + return p.root.parse(p) } -func (p *parser) run() error { - fn := parseRoot - var err error - for { - fn, err = fn(p) - switch err { - case io.EOF: - return nil - case nil: - default: - return err - } +// returns the next token and advances the input stream +func (p *parser) next() token { + if len(p.backup) > 0 { + t := p.backup[len(p.backup)-1] + p.backup = p.backup[:len(p.backup)-1] + return t } -} - -type assignment struct { - name string - value interface{} -} - -func parse(in chan token) (*Config, error) { - p := &parser{ - in: in, - out: new(Config), - } - if err := p.run(); err != nil { - return nil, err - } - return p.out, nil -} - -func parseString(in string) (*Config, error) { - return parse(lexString(in)) + return <-p.input } diff --git a/parse_test.go b/parse_test.go index 4f4df83..544ef09 100644 --- a/parse_test.go +++ b/parse_test.go @@ -1,127 +1,76 @@ package main import ( + "strings" "testing" ) var parseTests = []parseTest{ { - in: ``, - desc: "an empty string is a valid config", - configTests: []configTest{ - { - desc: "undefined name field should not exist", - pass: inv(hasKey("name")), + source: ``, + root: &rootNode{ + children: []node{}, + }, + }, + { + source: `# just a comment`, + root: &rootNode{ + children: []node{ + commentNode(" just a comment"), }, }, }, { - in: `name `, - desc: "eof after name", - errorType: e_unexpected_eof, + source: `name = "jordan"`, + root: &rootNode{ + children: []node{ + &assignmentNode{ + name: "name", + value: "jordan", + }, + }, + }, }, { - in: `firstname lastname`, - desc: "two names in a row", - errorType: e_unexpected_token, + source: ` + first_name = "jordan" + last_name = "orelli" + `, + root: &rootNode{}, }, { - in: `name = `, - desc: "eof after equals", - errorType: e_unexpected_eof, + source: ` + # personal info + first_name = "jordan" + last_name = "orelli" + `, + root: &rootNode{}, }, { - in: `name = "jordan"`, - desc: "assign a value", - configTests: []configTest{ - { - desc: "should have name", - pass: hasValue("name", "jordan"), - }, - }, + source: ` + first_name = "jordan" # yep, that's my name + last_name = "orelli" # comments should be able to follow other shit + `, + root: &rootNode{}, }, } -// a boolean statement about a config struct -type configPredicate func(*Config) bool - -// a suite of tests for parsing potm input type parseTest struct { - in string - desc string - configTests []configTest - errorType parseErrorType + source string + root *rootNode } func (p *parseTest) run(t *testing.T) { - c, err := parseString(p.in) + r := strings.NewReader(p.source) + n, err := parse(r) if err != nil { - t.Logf("test %s has error %v", p.desc, err) - e, ok := err.(parseError) - if !ok { - t.Errorf("unexpected error: %s", e) - return - } - if p.errorType == e.t { - t.Logf("OK: got expected error type %v for %s", e.t, p.desc) - } else { - t.Errorf("unexpected parse error: %s", e) - return - } - } - t.Logf("parsed config for %s", p.desc) - t.Log(c) - p.runConfigTests(t, c) -} - -func (p *parseTest) runConfigTests(t *testing.T, c *Config) { - ok := true - for _, test := range p.configTests { - if test.pass(c) { - t.Logf("OK: %s", test.desc) - } else { - t.Errorf("config predicate failed: %s", test.desc) - ok = false - } - } - if ok { - t.Logf("OK: %s", p.desc) + t.Errorf("parse error: %v", err) + return } -} - -// an individual test for confirming that a parsed config struct meets some -// predicate -type configTest struct { - desc string - pass configPredicate -} - -// inverts a given config predicate -func inv(fn configPredicate) configPredicate { - return func(c *Config) bool { - return !fn(c) - } -} - -func hasKey(s string) configPredicate { - return func(c *Config) bool { - return c.hasKey(s) - } -} - -func hasValue(key string, expected interface{}) configPredicate { - switch t := expected.(type) { - case string: - return hasStringValue(key, t) - default: - panic("no we can't do that yet") - } -} - -func hasStringValue(key string, expected string) configPredicate { - return func(c *Config) bool { - return c.GetString(key) == expected + if n.Type() != n_root { + t.Errorf("we expected a root node object, but instead we got: %s", n.Type()) } + t.Logf("output: %v", n) } func TestParse(t *testing.T) {