From ffab114a0b56034dfea66343be5dbe8fe16638db Mon Sep 17 00:00:00 2001 From: Jordan Orelli Date: Sun, 11 Sep 2016 09:42:31 -0400 Subject: [PATCH] fix tree, start fieldpath stuff --- bit/decode.go | 47 +++++++++ ent/entity.go | 8 +- ent/fieldpath.go | 47 +++++++++ ent/huff.go | 245 +++++++++++++++++++++++++++++++---------------- ent/huff_test.go | 68 +++++++++++-- 5 files changed, 323 insertions(+), 92 deletions(-) create mode 100644 ent/fieldpath.go diff --git a/bit/decode.go b/bit/decode.go index da4e3b7..5c8795f 100644 --- a/bit/decode.go +++ b/bit/decode.go @@ -27,6 +27,24 @@ func ReadUBitVar(r Reader) uint64 { } } +// reads some sort of uint in a variable length encoding that appears in +// fieldpath. this encoding is deeply baffling. +func ReadUBitVarFP(r Reader) uint64 { + if ReadBool(r) { + return r.ReadBits(2) + } + if ReadBool(r) { + return r.ReadBits(4) + } + if ReadBool(r) { + return r.ReadBits(10) + } + if ReadBool(r) { + return r.ReadBits(17) + } + return r.ReadBits(31) +} + // ReadVarInt reads a variable length int value as a uint64. This is the binary // representation used by Protobuf. Each byte contributes 7 bits to the value // in little-endian order. The most-significant bit of each byte represents a @@ -50,6 +68,26 @@ func ReadVarInt(r Reader) uint64 { return x } +// reads a 32bit varint +func ReadVarInt32(r Reader) uint32 { + var ( + x uint64 + b uint64 + shift uint + ) + for ; shift < 32; shift += 7 { + b = r.ReadBits(8) + if r.Err() != nil { + return 0 + } + x |= b & 0x7f << shift + if b&0x80 == 0 { + return uint32(x) + } + } + return uint32(x) +} + func ReadBool(r Reader) bool { return r.ReadBits(1) != 0 } @@ -62,3 +100,12 @@ func ReadString(r Reader) string { } return buf.String() } + +// reads a ZigZag-encoded 32 bit signed integer +func ReadZigZag32(r Reader) int32 { + u := ReadVarInt32(r) + if u&1 > 0 { + return ^int32(u >> 1) + } + return int32(u >> 1) +} diff --git a/ent/entity.go b/ent/entity.go index 9fe9845..c648685 100644 --- a/ent/entity.go +++ b/ent/entity.go @@ -14,6 +14,12 @@ func (e *Entity) Read(br bit.Reader) error { if e.Class == nil { return fmt.Errorf("unable to read entity: entity has no class") } - Debug.Printf("Entity %v read", e) + Debug.Printf("entity %v read", e) + + fp := newFieldPath() + for fn := walk(htree, br); fn != nil; fn = walk(htree, br) { + fn(fp, br) + } + Debug.Printf("fieldpath %s", fp.pathString()) return nil } diff --git a/ent/fieldpath.go b/ent/fieldpath.go new file mode 100644 index 0000000..fc74a73 --- /dev/null +++ b/ent/fieldpath.go @@ -0,0 +1,47 @@ +package ent + +import ( + "bytes" + "fmt" +) + +// a fieldpath is a list of integers that is used to walk the type hierarchy to +// identify a given field on a given type. +type fieldPath struct { + vals []int + last int +} + +func newFieldPath() *fieldPath { + f := &fieldPath{vals: make([]int, 32)} + f.vals[f.last] = -1 + return f +} + +func (f *fieldPath) add(i int) { + f.vals[f.last] += i +} + +func (f *fieldPath) push(i int) { + f.last++ + f.vals[f.last] = i +} + +func (f *fieldPath) pop() int { + f.last-- + return f.vals[f.last+1] +} + +func (f *fieldPath) replaceAll(fn func(v int) int) { + for i := 0; i <= f.last; i++ { + f.vals[i] = fn(f.vals[i]) + } +} + +func (f *fieldPath) pathString() string { + var buf bytes.Buffer + for i := 0; i <= f.last; i++ { + fmt.Fprintf(&buf, "/%d", f.vals[i]) + } + return buf.String() +} diff --git a/ent/huff.go b/ent/huff.go index 2180d2a..70a1e65 100644 --- a/ent/huff.go +++ b/ent/huff.go @@ -10,58 +10,38 @@ import ( type node interface { weight() int - maxRank() int + rank() int } // intermediate node -type iNode struct{ left, right node } +type iNode struct { + left node + right node + _rank int +} func (n iNode) String() string { - return fmt.Sprintf("{%d %d *}", n.left.weight()+n.right.weight(), n.maxRank()) + return fmt.Sprintf("{%d %d *}", n.left.weight()+n.right.weight(), n.rank()) } func (n iNode) weight() int { return n.left.weight() + n.right.weight() } -func (n iNode) maxRank() int { - r := 0 - switch v := n.left.(type) { - case iNode: - r2 := v.maxRank() - if r2 > r { - r = r2 - } - case lNode: - if v.rank > r { - r = v.rank - } - } - switch v := n.right.(type) { - case iNode: - r2 := v.maxRank() - if r2 > r { - r = r2 - } - case lNode: - if v.rank > r { - r = v.rank - } - } - return r -} + +func (n iNode) rank() int { return n._rank } // leaf node type lNode struct { - name string - rank int - freq int - fn func() + name string + _rank int + freq int + fn func(*fieldPath, bit.Reader) } func (n lNode) String() string { - return fmt.Sprintf("{%d %d %s}", n.freq, n.rank, n.name) + return fmt.Sprintf("{%d %d %s}", n.freq, n._rank, n.name) } -func (n lNode) weight() int { return n.freq } -func (n lNode) maxRank() int { return n.rank } +func (n lNode) weight() int { return n.freq } +func (n lNode) rank() int { return n._rank } // three-way comparison for nodes, used in sorting func n_compare(n1, n2 node) int { @@ -70,22 +50,22 @@ func n_compare(n1, n2 node) int { return -1 case n1.weight() > n2.weight(): return 1 - case n1.maxRank() < n2.maxRank(): - return -1 - case n1.maxRank() > n2.maxRank(): + case n1.rank() < n2.rank(): return 1 + case n1.rank() > n2.rank(): + return -1 default: return 0 } } // joins two nodes, creating and returning their parent node -func n_join(n1, n2 node) node { +func n_join(n1, n2 node, rank int) node { switch n_compare(n1, n2) { case -1: - return iNode{n1, n2} + return iNode{n1, n2, rank} case 0, 1: - return iNode{n2, n1} + return iNode{n2, n1, rank} default: panic("not reached") } @@ -104,16 +84,17 @@ func makeTree(l nodeList) node { // remove the last two nodes, joining them to create their parent node. // append this parent node to the list of nodes. // repeat until only one node remains, which is the root node. - for len(l) > 1 { + for rank := len(l); len(l) > 1; rank++ { sort.Sort(sort.Reverse(l)) - l = append(l[:len(l)-2], n_join(l[len(l)-2], l[len(l)-1])) + l = append(l[:len(l)-2], n_join(l[len(l)-2], l[len(l)-1], rank)) } return l[0] } -func walk(n node, br bit.Reader) func() { +func walk(n node, br bit.Reader) func(*fieldPath, bit.Reader) { switch v := n.(type) { case lNode: + // Debug.Printf("fieldpath fn: %s", v.name) return v.fn case iNode: if bit.ReadBool(br) { @@ -137,49 +118,143 @@ func dump(n node, prefix string, w io.Writer) { } var hlist = nodeList{ - lNode{"PlusOne", 0, 36271, func() { panic("not implemented: PlusOne") }}, - lNode{"FieldPathEncodeFinish", 39, 25474, func() { panic("not implemented: FieldPathEncodeFinish") }}, - lNode{"PushOneLeftDeltaNRightNonZeroPack6Bits", 11, 10530, func() { panic("not implemented: PushOneLeftDeltaNRightNonZeroPack6Bits") }}, - lNode{"PlusTwo", 1, 10334, func() { panic("not implemented: PlusTwo") }}, - lNode{"PlusN", 4, 4128, func() { panic("not implemented: PlusN") }}, - lNode{"PushOneLeftDeltaOneRightNonZero", 8, 2942, func() { panic("not implemented: PushOneLeftDeltaOneRightNonZero") }}, - lNode{"PopAllButOnePlusOne", 29, 1837, func() { panic("not implemented: PopAllButOnePlusOne") }}, - lNode{"PlusThree", 2, 1375, func() { panic("not implemented: PlusThree") }}, - lNode{"PlusFour", 3, 646, func() { panic("not implemented: PlusFour") }}, - lNode{"PopAllButOnePlusNPack6Bits", 32, 634, func() { panic("not implemented: PopAllButOnePlusNPack6Bits") }}, - lNode{"PushOneLeftDeltaNRightZero", 9, 560, func() { panic("not implemented: PushOneLeftDeltaNRightZero") }}, - lNode{"PushOneLeftDeltaOneRightZero", 7, 521, func() { panic("not implemented: PushOneLeftDeltaOneRightZero") }}, - lNode{"PushOneLeftDeltaNRightNonZero", 10, 471, func() { panic("not implemented: PushOneLeftDeltaNRightNonZero") }}, - lNode{"PushNAndNonTopological", 26, 310, func() { panic("not implemented: PushNAndNonTopological") }}, - lNode{"PopAllButOnePlusNPack3Bits", 31, 300, func() { panic("not implemented: PopAllButOnePlusNPack3Bits") }}, - lNode{"NonTopoPenultimatePlusOne", 37, 271, func() { panic("not implemented: NonTopoPenultimatePlusOne") }}, - lNode{"PushOneLeftDeltaNRightNonZeroPack8Bits", 12, 251, func() { panic("not implemented: PushOneLeftDeltaNRightNonZeroPack8Bits") }}, - lNode{"PopAllButOnePlusN", 30, 149, func() { panic("not implemented: PopAllButOnePlusN") }}, - lNode{"NonTopoComplexPack4Bits", 38, 99, func() { panic("not implemented: NonTopoComplexPack4Bits") }}, - lNode{"NonTopoComplex", 36, 76, func() { panic("not implemented: NonTopoComplex") }}, - lNode{"PushOneLeftDeltaZeroRightZero", 5, 35, func() { panic("not implemented: PushOneLeftDeltaZeroRightZero") }}, - lNode{"PushOneLeftDeltaZeroRightNonZero", 6, 3, func() { panic("not implemented: PushOneLeftDeltaZeroRightNonZero") }}, - lNode{"PopOnePlusOne", 27, 2, func() { panic("not implemented: PopOnePlusOne") }}, - lNode{"PopNAndNonTopographical", 35, 1, func() { panic("not implemented: PopNAndNonTopographical") }}, + lNode{"PlusOne", 0, 36271, func(fp *fieldPath, br bit.Reader) { + fp.add(1) + }}, + lNode{"FieldPathEncodeFinish", 39, 25474, nil}, + lNode{"PushOneLeftDeltaNRightNonZeroPack6Bits", 11, 10530, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushOneLeftDeltaNRightNonZeroPack6Bits") + }}, + lNode{"PlusTwo", 1, 10334, func(fp *fieldPath, br bit.Reader) { + fp.add(2) + }}, + lNode{"PlusN", 4, 4128, func(fp *fieldPath, br bit.Reader) { + fp.add(int(bit.ReadUBitVarFP(br)) + 5) + }}, + lNode{"PushOneLeftDeltaOneRightNonZero", 8, 2942, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushOneLeftDeltaOneRightNonZero") + }}, + lNode{"PopAllButOnePlusOne", 29, 1837, func(fp *fieldPath, br bit.Reader) { + fp.last = 0 + fp.add(1) + }}, + lNode{"PlusThree", 2, 1375, func(fp *fieldPath, br bit.Reader) { + fp.add(3) + }}, + lNode{"PlusFour", 3, 646, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PlusFour") + }}, + lNode{"PopAllButOnePlusNPack6Bits", 32, 634, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PopAllButOnePlusNPack6Bits") + }}, + lNode{"PushOneLeftDeltaNRightZero", 9, 560, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushOneLeftDeltaNRightZero") + }}, + lNode{"PushOneLeftDeltaOneRightZero", 7, 521, func(fp *fieldPath, br bit.Reader) { + fp.add(1) + fp.push(0) + }}, + lNode{"PushOneLeftDeltaNRightNonZero", 10, 471, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushOneLeftDeltaNRightNonZero") + }}, + lNode{"PushNAndNonTopological", 26, 310, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushNAndNonTopological") + }}, + lNode{"PopAllButOnePlusNPack3Bits", 31, 300, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PopAllButOnePlusNPack3Bits") + }}, + lNode{"NonTopoPenultimatePlusOne", 37, 271, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: NonTopoPenultimatePlusOne") + }}, + lNode{"PushOneLeftDeltaNRightNonZeroPack8Bits", 12, 251, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushOneLeftDeltaNRightNonZeroPack8Bits") + }}, + lNode{"PopAllButOnePlusN", 30, 149, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PopAllButOnePlusN") + }}, + lNode{"NonTopoComplexPack4Bits", 38, 99, func(fp *fieldPath, br bit.Reader) { + fp.replaceAll(func(i int) int { + if bit.ReadBool(br) { + return i + int(br.ReadBits(4)) - 7 // ?! + } + return i + }) + }}, + lNode{"NonTopoComplex", 36, 76, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: NonTopoComplex") + // for i := 0; i < len(fp.index); i++ { + // fp.replaceAll(func(i int) int { + // if bit.ReadBool(br) { + // return i + bit.ReadVarInt(br) + // } + // return i + // }) + // } + }}, + lNode{"PushOneLeftDeltaZeroRightZero", 5, 35, func(fp *fieldPath, br bit.Reader) { + fp.push(0) + }}, + lNode{"PushOneLeftDeltaZeroRightNonZero", 6, 3, func(fp *fieldPath, br bit.Reader) { + fp.push(int(bit.ReadUBitVarFP(br))) + }}, + lNode{"PopOnePlusOne", 27, 2, func(fp *fieldPath, br bit.Reader) { + fp.pop() + fp.add(1) + }}, + lNode{"PopNAndNonTopographical", 35, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PopNAndNonTopographical") + }}, // all the other operations have weights of 0 in clarity, which makes no // sense. - lNode{"PopNPlusN", 34, 1, func() { panic("not implemented: PopNPlusN") }}, - lNode{"PopNPlusOne", 33, 1, func() { panic("not implemented: PopNPlusOne") }}, - lNode{"PopOnePlusN", 28, 1, func() { panic("not implemented: PopOnePlusN") }}, - lNode{"PushN", 25, 1, func() { panic("not implemented: PushN") }}, - lNode{"PushThreePack5LeftDeltaN", 24, 1, func() { panic("not implemented: PushThreePack5LeftDeltaN") }}, - lNode{"PushThreeLeftDeltaN", 23, 1, func() { panic("not implemented: PushThreeLeftDeltaN") }}, - lNode{"PushTwoPack5LeftDeltaN", 22, 1, func() { panic("not implemented: PushTwoPack5LeftDeltaN") }}, - lNode{"PushTwoLeftDeltaN", 21, 1, func() { panic("not implemented: PushTwoLeftDeltaN") }}, - lNode{"PushThreePack5LeftDeltaOne", 20, 1, func() { panic("not implemented: PushThreePack5LeftDeltaOne") }}, - lNode{"PushThreeLeftDeltaOne", 19, 1, func() { panic("not implemented: PushThreeLeftDeltaOne") }}, - lNode{"PushTwoPack5LeftDeltaOne", 18, 1, func() { panic("not implemented: PushTwoPack5LeftDeltaOne") }}, - lNode{"PushTwoLeftDeltaOne", 17, 1, func() { panic("not implemented: PushTwoLeftDeltaOne") }}, - lNode{"PushThreePack5LeftDeltaZero", 16, 1, func() { panic("not implemented: PushThreePack5LeftDeltaZero") }}, - lNode{"PushThreeLeftDeltaZero", 15, 1, func() { panic("not implemented: PushThreeLeftDeltaZero") }}, - lNode{"PushTwoPack5LeftDeltaZero", 14, 1, func() { panic("not implemented: PushTwoPack5LeftDeltaZero") }}, - lNode{"PushTwoLeftDeltaZero", 13, 1, func() { panic("not implemented: PushTwoLeftDeltaZero") }}, + lNode{"PopNPlusN", 34, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PopNPlusN") + }}, + lNode{"PopNPlusOne", 33, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PopNPlusOne") + }}, + lNode{"PopOnePlusN", 28, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PopOnePlusN") + }}, + lNode{"PushN", 25, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushN") + }}, + lNode{"PushThreePack5LeftDeltaN", 24, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushThreePack5LeftDeltaN") + }}, + lNode{"PushThreeLeftDeltaN", 23, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushThreeLeftDeltaN") + }}, + lNode{"PushTwoPack5LeftDeltaN", 22, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushTwoPack5LeftDeltaN") + }}, + lNode{"PushTwoLeftDeltaN", 21, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushTwoLeftDeltaN") + }}, + lNode{"PushThreePack5LeftDeltaOne", 20, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushThreePack5LeftDeltaOne") + }}, + lNode{"PushThreeLeftDeltaOne", 19, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushThreeLeftDeltaOne") + }}, + lNode{"PushTwoPack5LeftDeltaOne", 18, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushTwoPack5LeftDeltaOne") + }}, + lNode{"PushTwoLeftDeltaOne", 17, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushTwoLeftDeltaOne") + }}, + lNode{"PushThreePack5LeftDeltaZero", 16, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushThreePack5LeftDeltaZero") + }}, + lNode{"PushThreeLeftDeltaZero", 15, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushThreeLeftDeltaZero") + }}, + lNode{"PushTwoPack5LeftDeltaZero", 14, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushTwoPack5LeftDeltaZero") + }}, + lNode{"PushTwoLeftDeltaZero", 13, 1, func(fp *fieldPath, br bit.Reader) { + panic("not implemented: PushTwoLeftDeltaZero") + }}, } var htree = makeTree(hlist) diff --git a/ent/huff_test.go b/ent/huff_test.go index 13a7556..e40ce5d 100644 --- a/ent/huff_test.go +++ b/ent/huff_test.go @@ -1,14 +1,70 @@ package ent import ( - "bytes" "testing" ) -func TestDump(t *testing.T) { - t.Log(hlist) +// thanks to @spheenik and @invokr for the expected huffman codes. these are +// ripped from the huffman trees that are known to be working in clarity and +// manta. +var expected_codes = map[string]string{ + "PlusOne": "0", + "FieldPathEncodeFinish": "10", + "PlusTwo": "1110", + "PushOneLeftDeltaNRightNonZeroPack6Bits": "1111", + "PushOneLeftDeltaOneRightNonZero": "11000", + "PlusN": "11010", + "PlusThree": "110010", + "PopAllButOnePlusOne": "110011", + "PushOneLeftDeltaNRightNonZero": "11011001", + "PushOneLeftDeltaOneRightZero": "11011010", + "PushOneLeftDeltaNRightZero": "11011100", + "PopAllButOnePlusNPack6Bits": "11011110", + "PlusFour": "11011111", + "PopAllButOnePlusN": "110110000", + "PushOneLeftDeltaNRightNonZeroPack8Bits": "110110110", + "NonTopoPenultimatePlusOne": "110110111", + "PopAllButOnePlusNPack3Bits": "110111010", + "PushNAndNonTopological": "110111011", + "NonTopoComplexPack4Bits": "1101100010", + "NonTopoComplex": "11011000111", + "PushOneLeftDeltaZeroRightZero": "110110001101", + "PopOnePlusOne": "110110001100001", + "PushOneLeftDeltaZeroRightNonZero": "110110001100101", + "PopNAndNonTopographical": "1101100011000000", + "PopNPlusN": "1101100011000001", + "PushN": "1101100011000100", + "PushThreePack5LeftDeltaN": "1101100011000101", + "PopNPlusOne": "1101100011000110", + "PopOnePlusN": "1101100011000111", + "PushTwoLeftDeltaZero": "1101100011001000", + "PushThreeLeftDeltaZero": "11011000110010010", + "PushTwoPack5LeftDeltaZero": "11011000110010011", + "PushTwoLeftDeltaN": "11011000110011000", + "PushThreePack5LeftDeltaOne": "11011000110011001", + "PushThreeLeftDeltaN": "11011000110011010", + "PushTwoPack5LeftDeltaN": "11011000110011011", + "PushTwoLeftDeltaOne": "11011000110011100", + "PushThreePack5LeftDeltaZero": "11011000110011101", + "PushThreeLeftDeltaOne": "11011000110011110", + "PushTwoPack5LeftDeltaOne": "11011000110011111", +} + +func TestTree(t *testing.T) { + var testWalk func(node, string) + testWalk = func(n node, code string) { + switch v := n.(type) { + case lNode: + if expected_codes[v.name] != code { + t.Errorf("op %s has code %s, expected %s", v.name, code, expected_codes[v.name]) + } else { + t.Logf("op %s has expected code %s", v.name, code) + } + case iNode: + testWalk(v.left, code+"0") + testWalk(v.right, code+"1") + } + } - var buf bytes.Buffer - dump(htree, "", &buf) - t.Logf("%s", buf.String()) + testWalk(htree, "") }