hash_test.go

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  	"internal/race"
    11  	"internal/testenv"
    12  	"math"
    13  	"math/rand"
    14  	"os"
    15  	. "runtime"
    16  	"slices"
    17  	"strings"
    18  	"testing"
    19  	"unsafe"
    20  )
    21  
    22  func TestMemHash32Equality(t *testing.T) {
    23  	if *UseAeshash {
    24  		t.Skip("skipping since AES hash implementation is used")
    25  	}
    26  	var b [4]byte
    27  	r := rand.New(rand.NewSource(1234))
    28  	seed := uintptr(r.Uint64())
    29  	for i := 0; i < 100; i++ {
    30  		randBytes(r, b[:])
    31  		got := MemHash32(unsafe.Pointer(&b), seed)
    32  		want := MemHash(unsafe.Pointer(&b), seed, 4)
    33  		if got != want {
    34  			t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
    35  		}
    36  	}
    37  }
    38  
    39  func TestMemHash64Equality(t *testing.T) {
    40  	if *UseAeshash {
    41  		t.Skip("skipping since AES hash implementation is used")
    42  	}
    43  	var b [8]byte
    44  	r := rand.New(rand.NewSource(1234))
    45  	seed := uintptr(r.Uint64())
    46  	for i := 0; i < 100; i++ {
    47  		randBytes(r, b[:])
    48  		got := MemHash64(unsafe.Pointer(&b), seed)
    49  		want := MemHash(unsafe.Pointer(&b), seed, 8)
    50  		if got != want {
    51  			t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
    52  		}
    53  	}
    54  }
    55  
    56  // Smhasher is a torture test for hash functions.
    57  // https://code.google.com/p/smhasher/
    58  // This code is a port of some of the Smhasher tests to Go.
    59  //
    60  // The current AES hash function passes Smhasher. Our fallback
    61  // hash functions don't, so we only enable the difficult tests when
    62  // we know the AES implementation is available.
    63  
    64  // Sanity checks.
    65  // hash should not depend on values outside key.
    66  // hash should not depend on alignment.
    67  func TestSmhasherSanity(t *testing.T) {
    68  	r := rand.New(rand.NewSource(1234))
    69  	const REP = 10
    70  	const KEYMAX = 128
    71  	const PAD = 16
    72  	const OFFMAX = 16
    73  	for k := 0; k < REP; k++ {
    74  		for n := 0; n < KEYMAX; n++ {
    75  			for i := 0; i < OFFMAX; i++ {
    76  				var b [KEYMAX + OFFMAX + 2*PAD]byte
    77  				var c [KEYMAX + OFFMAX + 2*PAD]byte
    78  				randBytes(r, b[:])
    79  				randBytes(r, c[:])
    80  				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
    81  				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
    82  					t.Errorf("hash depends on bytes outside key")
    83  				}
    84  			}
    85  		}
    86  	}
    87  }
    88  
    89  type HashSet struct {
    90  	list []uintptr // list of hashes added
    91  }
    92  
    93  func newHashSet() *HashSet {
    94  	return &HashSet{list: make([]uintptr, 0, 1024)}
    95  }
    96  func (s *HashSet) add(h uintptr) {
    97  	s.list = append(s.list, h)
    98  }
    99  func (s *HashSet) addS(x string) {
   100  	s.add(StringHash(x, 0))
   101  }
   102  func (s *HashSet) addB(x []byte) {
   103  	s.add(BytesHash(x, 0))
   104  }
   105  func (s *HashSet) addS_seed(x string, seed uintptr) {
   106  	s.add(StringHash(x, seed))
   107  }
   108  func (s *HashSet) check(t *testing.T) {
   109  	list := s.list
   110  	slices.Sort(list)
   111  
   112  	collisions := 0
   113  	for i := 1; i < len(list); i++ {
   114  		if list[i] == list[i-1] {
   115  			collisions++
   116  		}
   117  	}
   118  	n := len(list)
   119  
   120  	const SLOP = 50.0
   121  	pairs := int64(n) * int64(n-1) / 2
   122  	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
   123  	stddev := math.Sqrt(expected)
   124  	if float64(collisions) > expected+SLOP*(3*stddev+1) {
   125  		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f threshold=%f", collisions, expected, stddev, expected+SLOP*(3*stddev+1))
   126  	}
   127  	// Reset for reuse
   128  	s.list = s.list[:0]
   129  }
   130  
   131  // a string plus adding zeros must make distinct hashes
   132  func TestSmhasherAppendedZeros(t *testing.T) {
   133  	s := "hello" + strings.Repeat("\x00", 256)
   134  	h := newHashSet()
   135  	for i := 0; i <= len(s); i++ {
   136  		h.addS(s[:i])
   137  	}
   138  	h.check(t)
   139  }
   140  
   141  // All 0-3 byte strings have distinct hashes.
   142  func TestSmhasherSmallKeys(t *testing.T) {
   143  	if race.Enabled {
   144  		t.Skip("Too long for race mode")
   145  	}
   146  	testenv.ParallelOn64Bit(t)
   147  	h := newHashSet()
   148  	var b [3]byte
   149  	for i := 0; i < 256; i++ {
   150  		b[0] = byte(i)
   151  		h.addB(b[:1])
   152  		for j := 0; j < 256; j++ {
   153  			b[1] = byte(j)
   154  			h.addB(b[:2])
   155  			if !testing.Short() {
   156  				for k := 0; k < 256; k++ {
   157  					b[2] = byte(k)
   158  					h.addB(b[:3])
   159  				}
   160  			}
   161  		}
   162  	}
   163  	h.check(t)
   164  }
   165  
   166  // Different length strings of all zeros have distinct hashes.
   167  func TestSmhasherZeros(t *testing.T) {
   168  	t.Parallel()
   169  	N := 256 * 1024
   170  	if testing.Short() {
   171  		N = 1024
   172  	}
   173  	h := newHashSet()
   174  	b := make([]byte, N)
   175  	for i := 0; i <= N; i++ {
   176  		h.addB(b[:i])
   177  	}
   178  	h.check(t)
   179  }
   180  
   181  // Strings with up to two nonzero bytes all have distinct hashes.
   182  func TestSmhasherTwoNonzero(t *testing.T) {
   183  	if GOARCH == "wasm" {
   184  		t.Skip("Too slow on wasm")
   185  	}
   186  	if testing.Short() {
   187  		t.Skip("Skipping in short mode")
   188  	}
   189  	if race.Enabled {
   190  		t.Skip("Too long for race mode")
   191  	}
   192  	testenv.ParallelOn64Bit(t)
   193  	h := newHashSet()
   194  	for n := 2; n <= 16; n++ {
   195  		twoNonZero(h, n)
   196  	}
   197  	h.check(t)
   198  }
   199  func twoNonZero(h *HashSet, n int) {
   200  	b := make([]byte, n)
   201  
   202  	// all zero
   203  	h.addB(b)
   204  
   205  	// one non-zero byte
   206  	for i := 0; i < n; i++ {
   207  		for x := 1; x < 256; x++ {
   208  			b[i] = byte(x)
   209  			h.addB(b)
   210  			b[i] = 0
   211  		}
   212  	}
   213  
   214  	// two non-zero bytes
   215  	for i := 0; i < n; i++ {
   216  		for x := 1; x < 256; x++ {
   217  			b[i] = byte(x)
   218  			for j := i + 1; j < n; j++ {
   219  				for y := 1; y < 256; y++ {
   220  					b[j] = byte(y)
   221  					h.addB(b)
   222  					b[j] = 0
   223  				}
   224  			}
   225  			b[i] = 0
   226  		}
   227  	}
   228  }
   229  
   230  // Test strings with repeats, like "abcdabcdabcdabcd..."
   231  func TestSmhasherCyclic(t *testing.T) {
   232  	if testing.Short() {
   233  		t.Skip("Skipping in short mode")
   234  	}
   235  	if race.Enabled {
   236  		t.Skip("Too long for race mode")
   237  	}
   238  	t.Parallel()
   239  	r := rand.New(rand.NewSource(1234))
   240  	const REPEAT = 8
   241  	const N = 1000000
   242  	h := newHashSet()
   243  	for n := 4; n <= 12; n++ {
   244  		b := make([]byte, REPEAT*n)
   245  		for i := 0; i < N; i++ {
   246  			b[0] = byte(i * 79 % 97)
   247  			b[1] = byte(i * 43 % 137)
   248  			b[2] = byte(i * 151 % 197)
   249  			b[3] = byte(i * 199 % 251)
   250  			randBytes(r, b[4:n])
   251  			for j := n; j < n*REPEAT; j++ {
   252  				b[j] = b[j-n]
   253  			}
   254  			h.addB(b)
   255  		}
   256  		h.check(t)
   257  	}
   258  }
   259  
   260  // Test strings with only a few bits set
   261  func TestSmhasherSparse(t *testing.T) {
   262  	if GOARCH == "wasm" {
   263  		t.Skip("Too slow on wasm")
   264  	}
   265  	if testing.Short() {
   266  		t.Skip("Skipping in short mode")
   267  	}
   268  	t.Parallel()
   269  	h := newHashSet()
   270  	sparse(t, h, 32, 6)
   271  	sparse(t, h, 40, 6)
   272  	sparse(t, h, 48, 5)
   273  	sparse(t, h, 56, 5)
   274  	sparse(t, h, 64, 5)
   275  	sparse(t, h, 96, 4)
   276  	sparse(t, h, 256, 3)
   277  	sparse(t, h, 2048, 2)
   278  }
   279  func sparse(t *testing.T, h *HashSet, n int, k int) {
   280  	b := make([]byte, n/8)
   281  	setbits(h, b, 0, k)
   282  	h.check(t)
   283  }
   284  
   285  // set up to k bits at index i and greater
   286  func setbits(h *HashSet, b []byte, i int, k int) {
   287  	h.addB(b)
   288  	if k == 0 {
   289  		return
   290  	}
   291  	for j := i; j < len(b)*8; j++ {
   292  		b[j/8] |= byte(1 << uint(j&7))
   293  		setbits(h, b, j+1, k-1)
   294  		b[j/8] &= byte(^(1 << uint(j&7)))
   295  	}
   296  }
   297  
   298  // Test all possible combinations of n blocks from the set s.
   299  // "permutation" is a bad name here, but it is what Smhasher uses.
   300  func TestSmhasherPermutation(t *testing.T) {
   301  	if GOARCH == "wasm" {
   302  		t.Skip("Too slow on wasm")
   303  	}
   304  	if testing.Short() {
   305  		t.Skip("Skipping in short mode")
   306  	}
   307  	if race.Enabled {
   308  		t.Skip("Too long for race mode")
   309  	}
   310  	testenv.ParallelOn64Bit(t)
   311  	h := newHashSet()
   312  	permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
   313  	permutation(t, h, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
   314  	permutation(t, h, []uint32{0, 1}, 20)
   315  	permutation(t, h, []uint32{0, 1 << 31}, 20)
   316  	permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
   317  }
   318  func permutation(t *testing.T, h *HashSet, s []uint32, n int) {
   319  	b := make([]byte, n*4)
   320  	genPerm(h, b, s, 0)
   321  	h.check(t)
   322  }
   323  func genPerm(h *HashSet, b []byte, s []uint32, n int) {
   324  	h.addB(b[:n])
   325  	if n == len(b) {
   326  		return
   327  	}
   328  	for _, v := range s {
   329  		b[n] = byte(v)
   330  		b[n+1] = byte(v >> 8)
   331  		b[n+2] = byte(v >> 16)
   332  		b[n+3] = byte(v >> 24)
   333  		genPerm(h, b, s, n+4)
   334  	}
   335  }
   336  
   337  type Key interface {
   338  	clear()              // set bits all to 0
   339  	random(r *rand.Rand) // set key to something random
   340  	bits() int           // how many bits key has
   341  	flipBit(i int)       // flip bit i of the key
   342  	hash() uintptr       // hash the key
   343  	name() string        // for error reporting
   344  }
   345  
   346  type BytesKey struct {
   347  	b []byte
   348  }
   349  
   350  func (k *BytesKey) clear() {
   351  	clear(k.b)
   352  }
   353  func (k *BytesKey) random(r *rand.Rand) {
   354  	randBytes(r, k.b)
   355  }
   356  func (k *BytesKey) bits() int {
   357  	return len(k.b) * 8
   358  }
   359  func (k *BytesKey) flipBit(i int) {
   360  	k.b[i>>3] ^= byte(1 << uint(i&7))
   361  }
   362  func (k *BytesKey) hash() uintptr {
   363  	return BytesHash(k.b, 0)
   364  }
   365  func (k *BytesKey) name() string {
   366  	return fmt.Sprintf("bytes%d", len(k.b))
   367  }
   368  
   369  type Int32Key struct {
   370  	i uint32
   371  }
   372  
   373  func (k *Int32Key) clear() {
   374  	k.i = 0
   375  }
   376  func (k *Int32Key) random(r *rand.Rand) {
   377  	k.i = r.Uint32()
   378  }
   379  func (k *Int32Key) bits() int {
   380  	return 32
   381  }
   382  func (k *Int32Key) flipBit(i int) {
   383  	k.i ^= 1 << uint(i)
   384  }
   385  func (k *Int32Key) hash() uintptr {
   386  	return Int32Hash(k.i, 0)
   387  }
   388  func (k *Int32Key) name() string {
   389  	return "int32"
   390  }
   391  
   392  type Int64Key struct {
   393  	i uint64
   394  }
   395  
   396  func (k *Int64Key) clear() {
   397  	k.i = 0
   398  }
   399  func (k *Int64Key) random(r *rand.Rand) {
   400  	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
   401  }
   402  func (k *Int64Key) bits() int {
   403  	return 64
   404  }
   405  func (k *Int64Key) flipBit(i int) {
   406  	k.i ^= 1 << uint(i)
   407  }
   408  func (k *Int64Key) hash() uintptr {
   409  	return Int64Hash(k.i, 0)
   410  }
   411  func (k *Int64Key) name() string {
   412  	return "int64"
   413  }
   414  
   415  type EfaceKey struct {
   416  	i any
   417  }
   418  
   419  func (k *EfaceKey) clear() {
   420  	k.i = nil
   421  }
   422  func (k *EfaceKey) random(r *rand.Rand) {
   423  	k.i = uint64(r.Int63())
   424  }
   425  func (k *EfaceKey) bits() int {
   426  	// use 64 bits. This tests inlined interfaces
   427  	// on 64-bit targets and indirect interfaces on
   428  	// 32-bit targets.
   429  	return 64
   430  }
   431  func (k *EfaceKey) flipBit(i int) {
   432  	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
   433  }
   434  func (k *EfaceKey) hash() uintptr {
   435  	return EfaceHash(k.i, 0)
   436  }
   437  func (k *EfaceKey) name() string {
   438  	return "Eface"
   439  }
   440  
   441  type IfaceKey struct {
   442  	i interface {
   443  		F()
   444  	}
   445  }
   446  type fInter uint64
   447  
   448  func (x fInter) F() {
   449  }
   450  
   451  func (k *IfaceKey) clear() {
   452  	k.i = nil
   453  }
   454  func (k *IfaceKey) random(r *rand.Rand) {
   455  	k.i = fInter(r.Int63())
   456  }
   457  func (k *IfaceKey) bits() int {
   458  	// use 64 bits. This tests inlined interfaces
   459  	// on 64-bit targets and indirect interfaces on
   460  	// 32-bit targets.
   461  	return 64
   462  }
   463  func (k *IfaceKey) flipBit(i int) {
   464  	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
   465  }
   466  func (k *IfaceKey) hash() uintptr {
   467  	return IfaceHash(k.i, 0)
   468  }
   469  func (k *IfaceKey) name() string {
   470  	return "Iface"
   471  }
   472  
   473  // Flipping a single bit of a key should flip each output bit with 50% probability.
   474  func TestSmhasherAvalanche(t *testing.T) {
   475  	if GOARCH == "wasm" {
   476  		t.Skip("Too slow on wasm")
   477  	}
   478  	if testing.Short() {
   479  		t.Skip("Skipping in short mode")
   480  	}
   481  	if race.Enabled {
   482  		t.Skip("Too long for race mode")
   483  	}
   484  	t.Parallel()
   485  	avalancheTest1(t, &BytesKey{make([]byte, 2)})
   486  	avalancheTest1(t, &BytesKey{make([]byte, 4)})
   487  	avalancheTest1(t, &BytesKey{make([]byte, 8)})
   488  	avalancheTest1(t, &BytesKey{make([]byte, 16)})
   489  	avalancheTest1(t, &BytesKey{make([]byte, 32)})
   490  	avalancheTest1(t, &BytesKey{make([]byte, 200)})
   491  	avalancheTest1(t, &Int32Key{})
   492  	avalancheTest1(t, &Int64Key{})
   493  	avalancheTest1(t, &EfaceKey{})
   494  	avalancheTest1(t, &IfaceKey{})
   495  }
   496  func avalancheTest1(t *testing.T, k Key) {
   497  	const REP = 100000
   498  	r := rand.New(rand.NewSource(1234))
   499  	n := k.bits()
   500  
   501  	// grid[i][j] is a count of whether flipping
   502  	// input bit i affects output bit j.
   503  	grid := make([][hashSize]int, n)
   504  
   505  	for z := 0; z < REP; z++ {
   506  		// pick a random key, hash it
   507  		k.random(r)
   508  		h := k.hash()
   509  
   510  		// flip each bit, hash & compare the results
   511  		for i := 0; i < n; i++ {
   512  			k.flipBit(i)
   513  			d := h ^ k.hash()
   514  			k.flipBit(i)
   515  
   516  			// record the effects of that bit flip
   517  			g := &grid[i]
   518  			for j := 0; j < hashSize; j++ {
   519  				g[j] += int(d & 1)
   520  				d >>= 1
   521  			}
   522  		}
   523  	}
   524  
   525  	// Each entry in the grid should be about REP/2.
   526  	// More precisely, we did N = k.bits() * hashSize experiments where
   527  	// each is the sum of REP coin flips. We want to find bounds on the
   528  	// sum of coin flips such that a truly random experiment would have
   529  	// all sums inside those bounds with 99% probability.
   530  	N := n * hashSize
   531  	var c float64
   532  	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
   533  	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
   534  	}
   535  	c *= 11.0 // allowed slack: 40% to 60% - we don't need to be perfectly random
   536  	mean := .5 * REP
   537  	stddev := .5 * math.Sqrt(REP)
   538  	low := int(mean - c*stddev)
   539  	high := int(mean + c*stddev)
   540  	for i := 0; i < n; i++ {
   541  		for j := 0; j < hashSize; j++ {
   542  			x := grid[i][j]
   543  			if x < low || x > high {
   544  				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
   545  			}
   546  		}
   547  	}
   548  }
   549  
   550  // All bit rotations of a set of distinct keys
   551  func TestSmhasherWindowed(t *testing.T) {
   552  	if race.Enabled {
   553  		t.Skip("Too long for race mode")
   554  	}
   555  	t.Parallel()
   556  	h := newHashSet()
   557  	t.Logf("32 bit keys")
   558  	windowed(t, h, &Int32Key{})
   559  	t.Logf("64 bit keys")
   560  	windowed(t, h, &Int64Key{})
   561  	t.Logf("string keys")
   562  	windowed(t, h, &BytesKey{make([]byte, 128)})
   563  }
   564  func windowed(t *testing.T, h *HashSet, k Key) {
   565  	if GOARCH == "wasm" {
   566  		t.Skip("Too slow on wasm")
   567  	}
   568  	if PtrSize == 4 {
   569  		// This test tends to be flaky on 32-bit systems.
   570  		// There's not enough bits in the hash output, so we
   571  		// expect a nontrivial number of collisions, and it is
   572  		// often quite a bit higher than expected. See issue 43130.
   573  		t.Skip("Flaky on 32-bit systems")
   574  	}
   575  	if testing.Short() {
   576  		t.Skip("Skipping in short mode")
   577  	}
   578  	const BITS = 16
   579  
   580  	for r := 0; r < k.bits(); r++ {
   581  		for i := 0; i < 1<<BITS; i++ {
   582  			k.clear()
   583  			for j := 0; j < BITS; j++ {
   584  				if i>>uint(j)&1 != 0 {
   585  					k.flipBit((j + r) % k.bits())
   586  				}
   587  			}
   588  			h.add(k.hash())
   589  		}
   590  		h.check(t)
   591  	}
   592  }
   593  
   594  // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
   595  func TestSmhasherText(t *testing.T) {
   596  	if testing.Short() {
   597  		t.Skip("Skipping in short mode")
   598  	}
   599  	t.Parallel()
   600  	h := newHashSet()
   601  	text(t, h, "Foo", "Bar")
   602  	text(t, h, "FooBar", "")
   603  	text(t, h, "", "FooBar")
   604  }
   605  func text(t *testing.T, h *HashSet, prefix, suffix string) {
   606  	const N = 4
   607  	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
   608  	const L = len(S)
   609  	b := make([]byte, len(prefix)+N+len(suffix))
   610  	copy(b, prefix)
   611  	copy(b[len(prefix)+N:], suffix)
   612  	c := b[len(prefix):]
   613  	for i := 0; i < L; i++ {
   614  		c[0] = S[i]
   615  		for j := 0; j < L; j++ {
   616  			c[1] = S[j]
   617  			for k := 0; k < L; k++ {
   618  				c[2] = S[k]
   619  				for x := 0; x < L; x++ {
   620  					c[3] = S[x]
   621  					h.addB(b)
   622  				}
   623  			}
   624  		}
   625  	}
   626  	h.check(t)
   627  }
   628  
   629  // Make sure different seed values generate different hashes.
   630  func TestSmhasherSeed(t *testing.T) {
   631  	h := newHashSet()
   632  	const N = 100000
   633  	s := "hello"
   634  	for i := 0; i < N; i++ {
   635  		h.addS_seed(s, uintptr(i))
   636  	}
   637  	h.check(t)
   638  }
   639  
   640  func TestIssue66841(t *testing.T) {
   641  	testenv.MustHaveExec(t)
   642  	if *UseAeshash && os.Getenv("TEST_ISSUE_66841") == "" {
   643  		// We want to test the backup hash, so if we're running on a machine
   644  		// that uses aeshash, exec ourselves while turning aes off.
   645  		cmd := testenv.CleanCmdEnv(testenv.Command(t, os.Args[0], "-test.run=^TestIssue66841$"))
   646  		cmd.Env = append(cmd.Env, "GODEBUG=cpu.aes=off", "TEST_ISSUE_66841=1")
   647  		out, err := cmd.CombinedOutput()
   648  		if err != nil {
   649  			t.Errorf("%s", string(out))
   650  		}
   651  		// Fall through. Might as well run this test when aeshash is on also.
   652  	}
   653  	h := newHashSet()
   654  	var b [16]byte
   655  	binary.LittleEndian.PutUint64(b[:8], 0xe7037ed1a0b428db) // runtime.m2
   656  	for i := 0; i < 1000; i++ {
   657  		binary.LittleEndian.PutUint64(b[8:], uint64(i))
   658  		h.addB(b[:])
   659  	}
   660  	h.check(t)
   661  }
   662  
   663  // size of the hash output (32 or 64 bits)
   664  const hashSize = 32 + int(^uintptr(0)>>63<<5)
   665  
   666  func randBytes(r *rand.Rand, b []byte) {
   667  	for i := range b {
   668  		b[i] = byte(r.Uint32())
   669  	}
   670  }
   671  
   672  func benchmarkHash(b *testing.B, n int) {
   673  	s := strings.Repeat("A", n)
   674  
   675  	for i := 0; i < b.N; i++ {
   676  		StringHash(s, 0)
   677  	}
   678  	b.SetBytes(int64(n))
   679  }
   680  
   681  func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
   682  func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
   683  func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
   684  func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
   685  func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
   686  
   687  func TestArrayHash(t *testing.T) {
   688  	// Make sure that "" in arrays hash correctly. The hash
   689  	// should at least scramble the input seed so that, e.g.,
   690  	// {"","foo"} and {"foo",""} have different hashes.
   691  
   692  	// If the hash is bad, then all (8 choose 4) = 70 keys
   693  	// have the same hash. If so, we allocate 70/8 = 8
   694  	// overflow buckets. If the hash is good we don't
   695  	// normally allocate any overflow buckets, and the
   696  	// probability of even one or two overflows goes down rapidly.
   697  	// (There is always 1 allocation of the bucket array. The map
   698  	// header is allocated on the stack.)
   699  	f := func() {
   700  		// Make the key type at most 128 bytes. Otherwise,
   701  		// we get an allocation per key.
   702  		type key [8]string
   703  		m := make(map[key]bool, 70)
   704  
   705  		// fill m with keys that have 4 "foo"s and 4 ""s.
   706  		for i := 0; i < 256; i++ {
   707  			var k key
   708  			cnt := 0
   709  			for j := uint(0); j < 8; j++ {
   710  				if i>>j&1 != 0 {
   711  					k[j] = "foo"
   712  					cnt++
   713  				}
   714  			}
   715  			if cnt == 4 {
   716  				m[k] = true
   717  			}
   718  		}
   719  		if len(m) != 70 {
   720  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
   721  		}
   722  	}
   723  	if n := testing.AllocsPerRun(10, f); n > 6 {
   724  		t.Errorf("too many allocs %f - hash not balanced", n)
   725  	}
   726  }
   727  func TestStructHash(t *testing.T) {
   728  	// See the comment in TestArrayHash.
   729  	f := func() {
   730  		type key struct {
   731  			a, b, c, d, e, f, g, h string
   732  		}
   733  		m := make(map[key]bool, 70)
   734  
   735  		// fill m with keys that have 4 "foo"s and 4 ""s.
   736  		for i := 0; i < 256; i++ {
   737  			var k key
   738  			cnt := 0
   739  			if i&1 != 0 {
   740  				k.a = "foo"
   741  				cnt++
   742  			}
   743  			if i&2 != 0 {
   744  				k.b = "foo"
   745  				cnt++
   746  			}
   747  			if i&4 != 0 {
   748  				k.c = "foo"
   749  				cnt++
   750  			}
   751  			if i&8 != 0 {
   752  				k.d = "foo"
   753  				cnt++
   754  			}
   755  			if i&16 != 0 {
   756  				k.e = "foo"
   757  				cnt++
   758  			}
   759  			if i&32 != 0 {
   760  				k.f = "foo"
   761  				cnt++
   762  			}
   763  			if i&64 != 0 {
   764  				k.g = "foo"
   765  				cnt++
   766  			}
   767  			if i&128 != 0 {
   768  				k.h = "foo"
   769  				cnt++
   770  			}
   771  			if cnt == 4 {
   772  				m[k] = true
   773  			}
   774  		}
   775  		if len(m) != 70 {
   776  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
   777  		}
   778  	}
   779  	if n := testing.AllocsPerRun(10, f); n > 6 {
   780  		t.Errorf("too many allocs %f - hash not balanced", n)
   781  	}
   782  }
   783  
   784  var sink uint64
   785  
   786  func BenchmarkAlignedLoad(b *testing.B) {
   787  	var buf [16]byte
   788  	p := unsafe.Pointer(&buf[0])
   789  	var s uint64
   790  	for i := 0; i < b.N; i++ {
   791  		s += ReadUnaligned64(p)
   792  	}
   793  	sink = s
   794  }
   795  
   796  func BenchmarkUnalignedLoad(b *testing.B) {
   797  	var buf [16]byte
   798  	p := unsafe.Pointer(&buf[1])
   799  	var s uint64
   800  	for i := 0; i < b.N; i++ {
   801  		s += ReadUnaligned64(p)
   802  	}
   803  	sink = s
   804  }
   805  
   806  func TestCollisions(t *testing.T) {
   807  	if testing.Short() {
   808  		t.Skip("Skipping in short mode")
   809  	}
   810  	t.Parallel()
   811  	for i := 0; i < 16; i++ {
   812  		for j := 0; j < 16; j++ {
   813  			if j == i {
   814  				continue
   815  			}
   816  			var a [16]byte
   817  			m := make(map[uint16]struct{}, 1<<16)
   818  			for n := 0; n < 1<<16; n++ {
   819  				a[i] = byte(n)
   820  				a[j] = byte(n >> 8)
   821  				m[uint16(BytesHash(a[:], 0))] = struct{}{}
   822  			}
   823  			// N balls in N bins, for N=65536
   824  			avg := 41427
   825  			stdDev := 123
   826  			if len(m) < avg-40*stdDev || len(m) > avg+40*stdDev {
   827  				t.Errorf("bad number of collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
   828  			}
   829  		}
   830  	}
   831  }
   832
View as plain text