You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
3.9 KiB
Go

// package lexnum provides an efficient lexicographic encoding of numbers as
// described here: http://www.zanopha.com/docs/elen.pdf
//
// lexnum allows a developer to encode integers as strings, such that the
// strings may be lexically sorted, preserving their numerical orderings.
//
// e.g., if one were to sort the numbers 0, 1, 2, 9, 10 lexically, we would
// wind up with 0, 1, 10, 2, 9. If we knew the maximum value, we could
// prescribe some zero-padding. Failing that, we would need a lexicographic
// encoding of the numbers. Lexnum attempts to provide this alternative.
package lexnum
import (
"fmt"
"strconv"
)
// an Encoder may be used to encode or decode an integer as a string. The
// produced strings will have the property that any set of numbers will have
// the same lexical sorting and numeric sorting.
type Encoder struct {
pos rune
neg rune
}
// NewEncoder creates a new lexnum Encoder. We achieve
func NewEncoder(pos rune, neg rune) *Encoder {
if pos < neg {
panic("positive lexnum rune must be of higher rank than negative lexnum rune")
}
if neg >= '0' {
panic("negative prefix must be lexically less than '0'")
}
if pos <= '9' {
panic("positive prefix must be lexically greather than '9'")
}
return &Encoder{pos: pos, neg: neg}
}
// Encodes an integer as a string.
func (l Encoder) EncodeInt(i int) string {
if i == 0 {
return "0"
}
if i > 0 {
return l.encodePos(i)
}
return l.encodeNeg(i)
}
func (l Encoder) encodePos(i int) string {
s := strconv.Itoa(i)
if len(s) == 1 {
return fmt.Sprintf("%c%s", l.pos, s)
}
return fmt.Sprintf("%c%s%s", l.pos, l.encodePos(len(s)), s)
}
func (l Encoder) encodeNeg(i int) string {
if i < 0 {
i = -i
}
runes := []rune(strconv.Itoa(i))
for i := range runes {
runes[i] = l.flip(runes[i])
}
if len(runes) == 1 {
return fmt.Sprintf("%c%s", l.neg, string(runes))
}
return fmt.Sprintf("%c%s%s", l.neg, l.encodeNeg(len(runes)), string(runes))
}
func (l Encoder) flip(r rune) rune {
switch r {
case '0':
return '9'
case '1':
return '8'
case '2':
return '7'
case '3':
return '6'
case '4':
return '5'
case '5':
return '4'
case '6':
return '3'
case '7':
return '2'
case '8':
return '1'
case '9':
return '0'
default:
panic(fmt.Sprintf("can't flip illegal rune %c", r))
}
}
func (l Encoder) flipInPlace(runes []rune) {
for i := range runes {
runes[i] = l.flip(runes[i])
}
}
func (l Encoder) isDigit(r rune) bool {
return r >= '0' && r <= '9'
}
func (l Encoder) prefixCount(runes []rune) int {
i := 0
for _, r := range runes {
if r == l.neg || r == l.pos {
i++
} else {
break
}
}
return i
}
// Decodes a lexnum string, returning its original integer representation.
func (l Encoder) DecodeInt(s string) (int, error) {
if s == "" {
return 0, fmt.Errorf("illegal Lexnum decode of empty string")
}
runes := []rune(s)
if len(runes) == 1 {
if runes[0] == '0' {
return 0, nil
}
return 0, fmt.Errorf("illegal Lexnum decode of non-zero unit string: %s", s)
}
switch runes[0] {
case l.neg:
return l.decodeNeg(runes)
case l.pos:
return l.decodePos(runes)
default:
return 0, fmt.Errorf("illegal Lexnum decode of string without %c or %c as initial rune: %s", l.neg, l.pos, s)
}
}
func (l Encoder) decodePos(runes []rune) (int, error) {
return l._decodePos(runes, 1, l.prefixCount(runes))
}
func (l Encoder) _decodePos(runes []rune, size int, index int) (int, error) {
n, err := strconv.ParseInt(string(runes[index:index+size]), 10, 64)
if err != nil {
return 0, err
}
if index+size > len(runes) {
return 0, fmt.Errorf("illegal Lexnum decode of abnormally long string %s", string(runes))
}
if index+size == len(runes) {
return int(n), nil
}
return l._decodePos(runes, int(n), index+size)
}
func (l Encoder) decodeNeg(runes []rune) (int, error) {
p := l.prefixCount(runes)
l.flipInPlace(runes[p:len(runes)])
n, err := l._decodePos(runes, 1, p)
if err != nil {
return 0, err
}
return -n, nil
}