lexing numbers

master
Jordan Orelli 10 years ago
parent 3c6fd7910c
commit 636a30b963

139
lex.go

@ -41,8 +41,8 @@ func (t tokenType) String() string {
return "t_object_separator" return "t_object_separator"
case t_object_end: case t_object_end:
return "t_object_end" return "t_object_end"
case t_int: case t_number:
return "t_int" return "t_number"
default: default:
panic(fmt.Sprintf("unknown token type: %v", t)) panic(fmt.Sprintf("unknown token type: %v", t))
} }
@ -62,7 +62,7 @@ const (
t_object_start // { t_object_start // {
t_object_end // } t_object_end // }
t_object_separator // : t_object_separator // :
t_int // an integer t_number // a number
) )
type stateFn func(*lexer) stateFn type stateFn func(*lexer) stateFn
@ -72,6 +72,10 @@ type token struct {
s string s string
} }
func (t token) String() string {
return fmt.Sprintf("{%s %s}", t.t, t.s)
}
type lexer struct { type lexer struct {
in io.RuneReader in io.RuneReader
out chan token out chan token
@ -125,6 +129,25 @@ func (l *lexer) emit(t tokenType) {
l.buf = l.buf[0:0] l.buf = l.buf[0:0]
} }
func (l *lexer) accept(chars string) bool {
r := l.next()
if strings.IndexRune(chars, r) >= 0 {
l.keep(r)
return true
} else {
l.unread(r)
return false
}
}
func (l *lexer) acceptRun(chars string) bool {
none := true
for l.accept(chars) {
none = false
}
return !none
}
func lexString(in string) chan token { func lexString(in string) chan token {
r := strings.NewReader(in) r := strings.NewReader(in)
return lex(r) return lex(r)
@ -195,9 +218,9 @@ func lexRoot(l *lexer) stateFn {
l.keep(r) l.keep(r)
l.emit(t_object_separator) l.emit(t_object_separator)
return lexRoot return lexRoot
// case strings.IndexRune("-0123456789", r) >= 0: case strings.IndexRune(".+-0123456789", r) >= 0:
// l.unread(r) l.unread(r)
// return lexNumber, nil return lexNumber
case unicode.IsSpace(r): case unicode.IsSpace(r):
return lexRoot return lexRoot
case unicode.IsLower(r): case unicode.IsLower(r):
@ -280,76 +303,34 @@ func lexType(l *lexer) stateFn {
} }
} }
// func lexNumber(l *lexer) (stateFn, error) { func lexNumber(l *lexer) stateFn {
// r, err := l.next() l.accept("+-")
// if err != nil { digits := "0123456789"
// return nil, err if l.accept("0") {
// } if l.accept("xX") {
// digits = "0123456789abcdefABCDEF"
// switch { } else {
// case r == '-', r == '+': digits = "01234567"
// l.keep(r) }
// return lexNumber, nil }
// case r == '0': l.acceptRun(digits)
// l.keep(r) if l.accept(".") {
// return lexHexOct, nil l.acceptRun(digits)
// case strings.IndexRune("123456789", r) >= 0: }
// l.keep(r) if l.accept("eE") {
// return lexDecimal, nil l.accept("+-")
// default: l.acceptRun("0123456789")
// return nil, fmt.Errorf("unexpected rune in lexNumber: %c", r) }
// } l.accept("i")
// } r := l.next()
// if isAlphaNumeric(r) {
// func lexHexOct(l *lexer) (stateFn, error) { return lexErrorf("unexpected alphanum in lexNumber: %c", r)
// r, err := l.next() }
// if err != nil { l.unread(r)
// return nil, err l.emit(t_number)
// } return lexRoot
// }
// switch {
// case r == 'x', r == 'X': func isAlphaNumeric(r rune) bool {
// l.keep(r) return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
// return lexHex, nil }
// case r == 'e', r == 'E':
// l.keep(r)
// return lexExponent, nil
// case r == '8', r == '9':
// return nil, fmt.Errorf("unexpected 8 or 9 in lexHexOct. there's no 8 or 9 in octal!")
// case strings.IndexRune("01234567", r) >= 0:
// l.keep(r)
// return lexOct, nil
// default:
// // we get here for the literals -0, +0 and 0
// l.unread(r)
// l.emit(t_int)
// return lexRoot, nil
// }
// }
//
// func lexHex(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
//
// }
// }
//
// func lexDecimal(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
// case strings.IndexRune("0123456789", r) >= 0:
// l.keep(r)
// return lexDecimal, nil
// case r == '.':
// l.keep(r)
// return lexFloat, nil
// }
// }

@ -49,6 +49,53 @@ var primitivesTests = []struct {
{t_string, "orelli"}, {t_string, "orelli"},
{t_object_end, "}"}, {t_object_end, "}"},
}}, }},
{`{
first_name: "jordan",
last_name: "orelli",
}`, []token{
{t_object_start, "{"},
{t_name, "first_name"},
{t_object_separator, ":"},
{t_string, "jordan"},
{t_list_separator, ","},
{t_name, "last_name"},
{t_object_separator, ":"},
{t_string, "orelli"},
{t_list_separator, ","},
{t_object_end, "}"},
}},
{`0`, []token{{t_number, "0"}}},
{`-0`, []token{{t_number, "-0"}}},
{`+0`, []token{{t_number, "+0"}}},
{`+125`, []token{{t_number, "+125"}}},
{`-125`, []token{{t_number, "-125"}}},
{`.0`, []token{{t_number, ".0"}}},
{`15`, []token{{t_number, "15"}}},
{`0x0`, []token{{t_number, "0x0"}}},
{`0xa`, []token{{t_number, "0xa"}}},
{`0xc0dea5cf`, []token{{t_number, "0xc0dea5cf"}}},
{`12.345`, []token{{t_number, "12.345"}}},
{`12.345 name`, []token{{t_number, "12.345"}, {t_name, "name"}}},
{`[12.345]`, []token{
{t_list_start, "["},
{t_number, "12.345"},
{t_list_end, "]"},
}},
{`[1, 2, 3]`, []token{
{t_list_start, "["},
{t_number, "1"},
{t_list_separator, ","},
{t_number, "2"},
{t_list_separator, ","},
{t_number, "3"},
{t_list_end, "]"},
}},
// an imaginary number generates two lexemes; one for its real component,
// and one for its imaginary component.
{`1+2i`, []token{{t_number, "1"}, {t_number, "+2i"}}},
{`1e9`, []token{{t_number, "1e9"}}},
{`1e+9`, []token{{t_number, "1e+9"}}},
{`1E-9`, []token{{t_number, "1E-9"}}},
} }
func TestLexPrimities(t *testing.T) { func TestLexPrimities(t *testing.T) {

Loading…
Cancel
Save