Source file
src/hash/maphash/smhasher_test.go
1
2
3
4
5
6
7 package maphash
8
9 import (
10 "fmt"
11 "internal/testenv"
12 "math"
13 "math/rand"
14 "runtime"
15 "slices"
16 "strings"
17 "testing"
18 "unsafe"
19 )
20
21
22
23
24
25
26
27
28 var fixedSeed = MakeSeed()
29
30
31
32
33 func TestSmhasherSanity(t *testing.T) {
34 t.Parallel()
35 r := rand.New(rand.NewSource(1234))
36 const REP = 10
37 const KEYMAX = 128
38 const PAD = 16
39 const OFFMAX = 16
40 for k := 0; k < REP; k++ {
41 for n := 0; n < KEYMAX; n++ {
42 for i := 0; i < OFFMAX; i++ {
43 var b [KEYMAX + OFFMAX + 2*PAD]byte
44 var c [KEYMAX + OFFMAX + 2*PAD]byte
45 randBytes(r, b[:])
46 randBytes(r, c[:])
47 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
48 if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) {
49 t.Errorf("hash depends on bytes outside key")
50 }
51 }
52 }
53 }
54 }
55
56 func bytesHash(b []byte) uint64 {
57 var h Hash
58 h.SetSeed(fixedSeed)
59 h.Write(b)
60 return h.Sum64()
61 }
62 func stringHash(s string) uint64 {
63 var h Hash
64 h.SetSeed(fixedSeed)
65 h.WriteString(s)
66 return h.Sum64()
67 }
68
69 const hashSize = 64
70
71 func randBytes(r *rand.Rand, b []byte) {
72 r.Read(b)
73 }
74
75
76 type hashSet struct {
77 list []uint64
78 }
79
80 func newHashSet() *hashSet {
81 return &hashSet{list: make([]uint64, 0, 1024)}
82 }
83 func (s *hashSet) add(h uint64) {
84 s.list = append(s.list, h)
85 }
86 func (s *hashSet) addS(x string) {
87 s.add(stringHash(x))
88 }
89 func (s *hashSet) addB(x []byte) {
90 s.add(bytesHash(x))
91 }
92 func (s *hashSet) addS_seed(x string, seed Seed) {
93 var h Hash
94 h.SetSeed(seed)
95 h.WriteString(x)
96 s.add(h.Sum64())
97 }
98 func (s *hashSet) check(t *testing.T) {
99 t.Helper()
100 list := s.list
101 slices.Sort(list)
102
103 collisions := 0
104 for i := 1; i < len(list); i++ {
105 if list[i] == list[i-1] {
106 collisions++
107 }
108 }
109 n := len(list)
110
111 const SLOP = 10.0
112 pairs := int64(n) * int64(n-1) / 2
113 expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
114 stddev := math.Sqrt(expected)
115 if float64(collisions) > expected+SLOP*(3*stddev+1) {
116 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
117 }
118
119 s.list = s.list[:0]
120 }
121
122
123 func TestSmhasherAppendedZeros(t *testing.T) {
124 t.Parallel()
125 s := "hello" + strings.Repeat("\x00", 256)
126 h := newHashSet()
127 for i := 0; i <= len(s); i++ {
128 h.addS(s[:i])
129 }
130 h.check(t)
131 }
132
133
134 func TestSmhasherSmallKeys(t *testing.T) {
135 testenv.ParallelOn64Bit(t)
136 h := newHashSet()
137 var b [3]byte
138 for i := 0; i < 256; i++ {
139 b[0] = byte(i)
140 h.addB(b[:1])
141 for j := 0; j < 256; j++ {
142 b[1] = byte(j)
143 h.addB(b[:2])
144 if !testing.Short() {
145 for k := 0; k < 256; k++ {
146 b[2] = byte(k)
147 h.addB(b[:3])
148 }
149 }
150 }
151 }
152 h.check(t)
153 }
154
155
156 func TestSmhasherZeros(t *testing.T) {
157 t.Parallel()
158 N := 256 * 1024
159 if testing.Short() {
160 N = 1024
161 }
162 h := newHashSet()
163 b := make([]byte, N)
164 for i := 0; i <= N; i++ {
165 h.addB(b[:i])
166 }
167 h.check(t)
168 }
169
170
171 func TestSmhasherTwoNonzero(t *testing.T) {
172 if runtime.GOARCH == "wasm" {
173 t.Skip("Too slow on wasm")
174 }
175 if testing.Short() {
176 t.Skip("Skipping in short mode")
177 }
178 testenv.ParallelOn64Bit(t)
179 h := newHashSet()
180 for n := 2; n <= 16; n++ {
181 twoNonZero(h, n)
182 }
183 h.check(t)
184 }
185 func twoNonZero(h *hashSet, n int) {
186 b := make([]byte, n)
187
188
189 h.addB(b)
190
191
192 for i := 0; i < n; i++ {
193 for x := 1; x < 256; x++ {
194 b[i] = byte(x)
195 h.addB(b)
196 b[i] = 0
197 }
198 }
199
200
201 for i := 0; i < n; i++ {
202 for x := 1; x < 256; x++ {
203 b[i] = byte(x)
204 for j := i + 1; j < n; j++ {
205 for y := 1; y < 256; y++ {
206 b[j] = byte(y)
207 h.addB(b)
208 b[j] = 0
209 }
210 }
211 b[i] = 0
212 }
213 }
214 }
215
216
217 func TestSmhasherCyclic(t *testing.T) {
218 if testing.Short() {
219 t.Skip("Skipping in short mode")
220 }
221 t.Parallel()
222 r := rand.New(rand.NewSource(1234))
223 const REPEAT = 8
224 const N = 1000000
225 h := newHashSet()
226 for n := 4; n <= 12; n++ {
227 b := make([]byte, REPEAT*n)
228 for i := 0; i < N; i++ {
229 b[0] = byte(i * 79 % 97)
230 b[1] = byte(i * 43 % 137)
231 b[2] = byte(i * 151 % 197)
232 b[3] = byte(i * 199 % 251)
233 randBytes(r, b[4:n])
234 for j := n; j < n*REPEAT; j++ {
235 b[j] = b[j-n]
236 }
237 h.addB(b)
238 }
239 h.check(t)
240 }
241 }
242
243
244 func TestSmhasherSparse(t *testing.T) {
245 if runtime.GOARCH == "wasm" {
246 t.Skip("Too slow on wasm")
247 }
248 if testing.Short() {
249 t.Skip("Skipping in short mode")
250 }
251 t.Parallel()
252 h := newHashSet()
253 sparse(t, h, 32, 6)
254 sparse(t, h, 40, 6)
255 sparse(t, h, 48, 5)
256 sparse(t, h, 56, 5)
257 sparse(t, h, 64, 5)
258 sparse(t, h, 96, 4)
259 sparse(t, h, 256, 3)
260 sparse(t, h, 2048, 2)
261 }
262 func sparse(t *testing.T, h *hashSet, n int, k int) {
263 t.Helper()
264 b := make([]byte, n/8)
265 setbits(h, b, 0, k)
266 h.check(t)
267 }
268
269
270 func setbits(h *hashSet, b []byte, i int, k int) {
271 h.addB(b)
272 if k == 0 {
273 return
274 }
275 for j := i; j < len(b)*8; j++ {
276 b[j/8] |= byte(1 << uint(j&7))
277 setbits(h, b, j+1, k-1)
278 b[j/8] &= byte(^(1 << uint(j&7)))
279 }
280 }
281
282
283
284 func TestSmhasherPermutation(t *testing.T) {
285 if runtime.GOARCH == "wasm" {
286 t.Skip("Too slow on wasm")
287 }
288 if testing.Short() {
289 t.Skip("Skipping in short mode")
290 }
291 testenv.ParallelOn64Bit(t)
292 h := newHashSet()
293 permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
294 permutation(t, h, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
295 permutation(t, h, []uint32{0, 1}, 20)
296 permutation(t, h, []uint32{0, 1 << 31}, 20)
297 permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
298 }
299 func permutation(t *testing.T, h *hashSet, s []uint32, n int) {
300 t.Helper()
301 b := make([]byte, n*4)
302 genPerm(h, b, s, 0)
303 h.check(t)
304 }
305 func genPerm(h *hashSet, b []byte, s []uint32, n int) {
306 h.addB(b[:n])
307 if n == len(b) {
308 return
309 }
310 for _, v := range s {
311 b[n] = byte(v)
312 b[n+1] = byte(v >> 8)
313 b[n+2] = byte(v >> 16)
314 b[n+3] = byte(v >> 24)
315 genPerm(h, b, s, n+4)
316 }
317 }
318
319 type key interface {
320 clear()
321 random(r *rand.Rand)
322 bits() int
323 flipBit(i int)
324 hash() uint64
325 name() string
326 }
327
328 type bytesKey struct {
329 b []byte
330 }
331
332 func (k *bytesKey) clear() {
333 clear(k.b)
334 }
335 func (k *bytesKey) random(r *rand.Rand) {
336 randBytes(r, k.b)
337 }
338 func (k *bytesKey) bits() int {
339 return len(k.b) * 8
340 }
341 func (k *bytesKey) flipBit(i int) {
342 k.b[i>>3] ^= byte(1 << uint(i&7))
343 }
344 func (k *bytesKey) hash() uint64 {
345 return bytesHash(k.b)
346 }
347 func (k *bytesKey) name() string {
348 return fmt.Sprintf("bytes%d", len(k.b))
349 }
350
351
352 func TestSmhasherAvalanche(t *testing.T) {
353 if runtime.GOARCH == "wasm" {
354 t.Skip("Too slow on wasm")
355 }
356 if testing.Short() {
357 t.Skip("Skipping in short mode")
358 }
359 t.Parallel()
360 avalancheTest1(t, &bytesKey{make([]byte, 2)})
361 avalancheTest1(t, &bytesKey{make([]byte, 4)})
362 avalancheTest1(t, &bytesKey{make([]byte, 8)})
363 avalancheTest1(t, &bytesKey{make([]byte, 16)})
364 avalancheTest1(t, &bytesKey{make([]byte, 32)})
365 avalancheTest1(t, &bytesKey{make([]byte, 200)})
366 }
367 func avalancheTest1(t *testing.T, k key) {
368 t.Helper()
369 const REP = 100000
370 r := rand.New(rand.NewSource(1234))
371 n := k.bits()
372
373
374
375 grid := make([][hashSize]int, n)
376
377 for z := 0; z < REP; z++ {
378
379 k.random(r)
380 h := k.hash()
381
382
383 for i := 0; i < n; i++ {
384 k.flipBit(i)
385 d := h ^ k.hash()
386 k.flipBit(i)
387
388
389 g := &grid[i]
390 for j := 0; j < hashSize; j++ {
391 g[j] += int(d & 1)
392 d >>= 1
393 }
394 }
395 }
396
397
398
399
400
401
402 N := n * hashSize
403 var c float64
404
405 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
406 }
407 c *= 11.0
408 mean := .5 * REP
409 stddev := .5 * math.Sqrt(REP)
410 low := int(mean - c*stddev)
411 high := int(mean + c*stddev)
412 for i := 0; i < n; i++ {
413 for j := 0; j < hashSize; j++ {
414 x := grid[i][j]
415 if x < low || x > high {
416 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
417 }
418 }
419 }
420 }
421
422
423 func TestSmhasherWindowed(t *testing.T) {
424 t.Parallel()
425 windowed(t, &bytesKey{make([]byte, 128)})
426 }
427 func windowed(t *testing.T, k key) {
428 if runtime.GOARCH == "wasm" {
429 t.Skip("Too slow on wasm")
430 }
431 if testing.Short() {
432 t.Skip("Skipping in short mode")
433 }
434 const BITS = 16
435
436 h := newHashSet()
437 for r := 0; r < k.bits(); r++ {
438 for i := 0; i < 1<<BITS; i++ {
439 k.clear()
440 for j := 0; j < BITS; j++ {
441 if i>>uint(j)&1 != 0 {
442 k.flipBit((j + r) % k.bits())
443 }
444 }
445 h.add(k.hash())
446 }
447 h.check(t)
448 }
449 }
450
451
452 func TestSmhasherText(t *testing.T) {
453 if testing.Short() {
454 t.Skip("Skipping in short mode")
455 }
456 t.Parallel()
457 h := newHashSet()
458 text(t, h, "Foo", "Bar")
459 text(t, h, "FooBar", "")
460 text(t, h, "", "FooBar")
461 }
462 func text(t *testing.T, h *hashSet, prefix, suffix string) {
463 t.Helper()
464 const N = 4
465 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
466 const L = len(S)
467 b := make([]byte, len(prefix)+N+len(suffix))
468 copy(b, prefix)
469 copy(b[len(prefix)+N:], suffix)
470 c := b[len(prefix):]
471 for i := 0; i < L; i++ {
472 c[0] = S[i]
473 for j := 0; j < L; j++ {
474 c[1] = S[j]
475 for k := 0; k < L; k++ {
476 c[2] = S[k]
477 for x := 0; x < L; x++ {
478 c[3] = S[x]
479 h.addB(b)
480 }
481 }
482 }
483 }
484 h.check(t)
485 }
486
487
488 func TestSmhasherSeed(t *testing.T) {
489 if unsafe.Sizeof(uintptr(0)) == 4 {
490 t.Skip("32-bit platforms don't have ideal seed-input distributions (see issue 33988)")
491 }
492 t.Parallel()
493 h := newHashSet()
494 const N = 100000
495 s := "hello"
496 for i := 0; i < N; i++ {
497 h.addS_seed(s, Seed{s: uint64(i + 1)})
498 h.addS_seed(s, Seed{s: uint64(i+1) << 32})
499 }
500 h.check(t)
501 }
502
View as plain text