lexing numbers

master
Jordan Orelli 10 years ago
parent 3c6fd7910c
commit 636a30b963

139
lex.go

@ -41,8 +41,8 @@ func (t tokenType) String() string {
return "t_object_separator"
case t_object_end:
return "t_object_end"
case t_int:
return "t_int"
case t_number:
return "t_number"
default:
panic(fmt.Sprintf("unknown token type: %v", t))
}
@ -62,7 +62,7 @@ const (
t_object_start // {
t_object_end // }
t_object_separator // :
t_int // an integer
t_number // a number
)
type stateFn func(*lexer) stateFn
@ -72,6 +72,10 @@ type token struct {
s string
}
func (t token) String() string {
return fmt.Sprintf("{%s %s}", t.t, t.s)
}
type lexer struct {
in io.RuneReader
out chan token
@ -125,6 +129,25 @@ func (l *lexer) emit(t tokenType) {
l.buf = l.buf[0:0]
}
func (l *lexer) accept(chars string) bool {
r := l.next()
if strings.IndexRune(chars, r) >= 0 {
l.keep(r)
return true
} else {
l.unread(r)
return false
}
}
func (l *lexer) acceptRun(chars string) bool {
none := true
for l.accept(chars) {
none = false
}
return !none
}
func lexString(in string) chan token {
r := strings.NewReader(in)
return lex(r)
@ -195,9 +218,9 @@ func lexRoot(l *lexer) stateFn {
l.keep(r)
l.emit(t_object_separator)
return lexRoot
// case strings.IndexRune("-0123456789", r) >= 0:
// l.unread(r)
// return lexNumber, nil
case strings.IndexRune(".+-0123456789", r) >= 0:
l.unread(r)
return lexNumber
case unicode.IsSpace(r):
return lexRoot
case unicode.IsLower(r):
@ -280,76 +303,34 @@ func lexType(l *lexer) stateFn {
}
}
// func lexNumber(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
// case r == '-', r == '+':
// l.keep(r)
// return lexNumber, nil
// case r == '0':
// l.keep(r)
// return lexHexOct, nil
// case strings.IndexRune("123456789", r) >= 0:
// l.keep(r)
// return lexDecimal, nil
// default:
// return nil, fmt.Errorf("unexpected rune in lexNumber: %c", r)
// }
// }
//
// func lexHexOct(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
// case r == 'x', r == 'X':
// l.keep(r)
// return lexHex, nil
// case r == 'e', r == 'E':
// l.keep(r)
// return lexExponent, nil
// case r == '8', r == '9':
// return nil, fmt.Errorf("unexpected 8 or 9 in lexHexOct. there's no 8 or 9 in octal!")
// case strings.IndexRune("01234567", r) >= 0:
// l.keep(r)
// return lexOct, nil
// default:
// // we get here for the literals -0, +0 and 0
// l.unread(r)
// l.emit(t_int)
// return lexRoot, nil
// }
// }
//
// func lexHex(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
//
// }
// }
//
// func lexDecimal(l *lexer) (stateFn, error) {
// r, err := l.next()
// if err != nil {
// return nil, err
// }
//
// switch {
// case strings.IndexRune("0123456789", r) >= 0:
// l.keep(r)
// return lexDecimal, nil
// case r == '.':
// l.keep(r)
// return lexFloat, nil
// }
// }
func lexNumber(l *lexer) stateFn {
l.accept("+-")
digits := "0123456789"
if l.accept("0") {
if l.accept("xX") {
digits = "0123456789abcdefABCDEF"
} else {
digits = "01234567"
}
}
l.acceptRun(digits)
if l.accept(".") {
l.acceptRun(digits)
}
if l.accept("eE") {
l.accept("+-")
l.acceptRun("0123456789")
}
l.accept("i")
r := l.next()
if isAlphaNumeric(r) {
return lexErrorf("unexpected alphanum in lexNumber: %c", r)
}
l.unread(r)
l.emit(t_number)
return lexRoot
}
func isAlphaNumeric(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}

@ -49,6 +49,53 @@ var primitivesTests = []struct {
{t_string, "orelli"},
{t_object_end, "}"},
}},
{`{
first_name: "jordan",
last_name: "orelli",
}`, []token{
{t_object_start, "{"},
{t_name, "first_name"},
{t_object_separator, ":"},
{t_string, "jordan"},
{t_list_separator, ","},
{t_name, "last_name"},
{t_object_separator, ":"},
{t_string, "orelli"},
{t_list_separator, ","},
{t_object_end, "}"},
}},
{`0`, []token{{t_number, "0"}}},
{`-0`, []token{{t_number, "-0"}}},
{`+0`, []token{{t_number, "+0"}}},
{`+125`, []token{{t_number, "+125"}}},
{`-125`, []token{{t_number, "-125"}}},
{`.0`, []token{{t_number, ".0"}}},
{`15`, []token{{t_number, "15"}}},
{`0x0`, []token{{t_number, "0x0"}}},
{`0xa`, []token{{t_number, "0xa"}}},
{`0xc0dea5cf`, []token{{t_number, "0xc0dea5cf"}}},
{`12.345`, []token{{t_number, "12.345"}}},
{`12.345 name`, []token{{t_number, "12.345"}, {t_name, "name"}}},
{`[12.345]`, []token{
{t_list_start, "["},
{t_number, "12.345"},
{t_list_end, "]"},
}},
{`[1, 2, 3]`, []token{
{t_list_start, "["},
{t_number, "1"},
{t_list_separator, ","},
{t_number, "2"},
{t_list_separator, ","},
{t_number, "3"},
{t_list_end, "]"},
}},
// an imaginary number generates two lexemes; one for its real component,
// and one for its imaginary component.
{`1+2i`, []token{{t_number, "1"}, {t_number, "+2i"}}},
{`1e9`, []token{{t_number, "1e9"}}},
{`1e+9`, []token{{t_number, "1e+9"}}},
{`1E-9`, []token{{t_number, "1E-9"}}},
}
func TestLexPrimities(t *testing.T) {

Loading…
Cancel
Save