murmur3_32.gno
3.84 Kb · 166 lines
1package murmur3
2
3import (
4 "hash"
5 "math/bits"
6)
7
8const (
9 c1 = 0xcc9e2d51
10 c2 = 0x1b873593
11 r1 = 15
12 r2 = 13
13 blockSize32 = 4
14 hashSize32 = 4
15)
16
17type digest32 struct {
18 seed uint32
19 h1 uint32
20 tail [4]byte
21 tailBytesCount int
22 writtenLen int
23}
24
25// New32 returns a new hash.Hash32 computing the MurmurHash3 hash.
26// It can generate 2 to the power of 32 unique 32-bit values ranging
27// from 0 to 4_294_967_296.
28// Hash is not collision safe, so it's recommended to handle potential
29// collisions when dealing with more than 65_536 values which is when
30// probability raises.
31func New32() hash.Hash32 {
32 return &digest32{}
33}
34
35// NewWithSeed32 returns a new hash.Hash32 computing the MurmurHash3
36// hash with the given seed.
37// It can generate 2 to the power of 32 unique 32-bit values ranging
38// from 0 to 4_294_967_296.
39// Hash is not collision safe, so it's recommended to handle potential
40// collisions when dealing with more than 65_536 values which is when
41// provability raises.
42func NewWithSeed32(seed uint32) hash.Hash32 {
43 return &digest32{seed: seed, h1: seed}
44}
45
46// Sum32 returns the MurmurHash3 hash of the specified data.
47func Sum32(data []byte) uint32 {
48 return Sum32WithSeed(data, 0)
49}
50
51// Sum32WithSeed returns the MurmurHash3 hash of the specified data
52// using the given seed.
53func Sum32WithSeed(data []byte, seed uint32) uint32 {
54 h := NewWithSeed32(seed)
55 h.Write(data)
56 return h.Sum32()
57}
58
59func (digest32) BlockSize() int {
60 return blockSize32
61}
62
63func (digest32) Size() int {
64 return hashSize32
65}
66
67func (d *digest32) Reset() {
68 d.h1 = d.seed
69 d.tailBytesCount = 0
70 d.writtenLen = 0
71}
72
73func (d *digest32) Write(bz []byte) (int, error) {
74 // Keep track of all written bytes
75 total := len(bz)
76 d.writtenLen += total
77
78 // If there are leftover bytes in the tail,
79 // try to fill a block with the new data.
80 if d.tailBytesCount > 0 {
81 need := blockSize32 - d.tailBytesCount
82 if len(bz) < need {
83 // Append more data to the block
84 copy(d.tail[d.tailBytesCount:], bz)
85 d.tailBytesCount += len(bz)
86
87 // Tail block is still not filled
88 return total, nil
89 }
90
91 // Append remaining block data and process current block
92 copy(d.tail[d.tailBytesCount:], bz[:need])
93 d.processBlock(d.tail[:])
94
95 // Update data to exclude the appended processed data
96 bz = bz[need:]
97 d.tailBytesCount = 0
98 }
99
100 // Process full 4-byte blocks
101 for len(bz) >= blockSize32 {
102 d.processBlock(bz[:blockSize32])
103 bz = bz[blockSize32:]
104 }
105
106 // Save remaining bytes in the tail. Remaining data can be left
107 // out of the last block when data size is less than 4 bytes.
108 if len(bz) > 0 {
109 d.tailBytesCount = copy(d.tail[:], bz)
110 }
111
112 return total, nil
113}
114
115func (d digest32) Sum32() uint32 {
116 h1 := d.h1
117
118 // Process tail bytes when they exist. Blocks have 4 bytes, so depending
119 // on the size of the data some bytes might not be processed after the last
120 // write if the last block has less than 4 bytes.
121 var k1 uint32
122 switch d.tailBytesCount {
123 case 3:
124 k1 ^= uint32(d.tail[2]) << 16
125 fallthrough
126 case 2:
127 k1 ^= uint32(d.tail[1]) << 8
128 fallthrough
129 case 1:
130 k1 ^= uint32(d.tail[0])
131 k1 *= c1
132 k1 = bits.RotateLeft32(k1, r1)
133 k1 *= c2
134 h1 ^= k1
135 }
136
137 // Finalization mix
138 h1 ^= uint32(d.writtenLen)
139
140 h1 ^= h1 >> 16
141 h1 *= 0x85ebca6b
142 h1 ^= h1 >> r2
143 h1 *= 0xc2b2ae35
144 h1 ^= h1 >> 16
145
146 return h1
147}
148
149func (d digest32) Sum(bz []byte) []byte {
150 // Append hash in little-endian order
151 s := d.Sum32()
152 return append(bz, byte(s), byte(s>>8), byte(s>>16), byte(s>>24))
153}
154
155// processBlock reads a 4-byte little-endian block
156// and mixes it into the hash state.
157func (d *digest32) processBlock(bz []byte) {
158 k1 := uint32(bz[0]) | uint32(bz[1])<<8 | uint32(bz[2])<<16 | uint32(bz[3])<<24
159 k1 *= c1
160 k1 = bits.RotateLeft32(k1, r1)
161 k1 *= c2
162
163 d.h1 ^= k1
164 d.h1 = bits.RotateLeft32(d.h1, r2)
165 d.h1 = d.h1*5 + 0xe6546b64 // MurmurHash3 "m" = 5, "n" = 0xe6546b64
166}