Source file src/hash/maphash/maphash.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package maphash provides hash functions on byte sequences and comparable values.
     6  // These hash functions are intended to be used to implement hash tables or
     7  // other data structures that need to map arbitrary strings or byte
     8  // sequences to a uniform distribution on unsigned 64-bit integers.
     9  // Each different instance of a hash table or data structure should use its own [Seed].
    10  //
    11  // The hash functions are not cryptographically secure.
    12  // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
    13  package maphash
    14  
    15  import (
    16  	"internal/abi"
    17  	"internal/byteorder"
    18  	"math"
    19  	"reflect"
    20  )
    21  
    22  // A Seed is a random value that selects the specific hash function
    23  // computed by a [Hash]. If two Hashes use the same Seeds, they
    24  // will compute the same hash values for any given input.
    25  // If two Hashes use different Seeds, they are very likely to compute
    26  // distinct hash values for any given input.
    27  //
    28  // A Seed must be initialized by calling [MakeSeed].
    29  // The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method.
    30  //
    31  // Each Seed value is local to a single process and cannot be serialized
    32  // or otherwise recreated in a different process.
    33  type Seed struct {
    34  	s uint64
    35  }
    36  
    37  // Bytes returns the hash of b with the given seed.
    38  //
    39  // Bytes is equivalent to, but more convenient and efficient than:
    40  //
    41  //	var h Hash
    42  //	h.SetSeed(seed)
    43  //	h.Write(b)
    44  //	return h.Sum64()
    45  func Bytes(seed Seed, b []byte) uint64 {
    46  	state := seed.s
    47  	if state == 0 {
    48  		panic("maphash: use of uninitialized Seed")
    49  	}
    50  
    51  	if len(b) > bufSize {
    52  		b = b[:len(b):len(b)] // merge len and cap calculations when reslicing
    53  		for len(b) > bufSize {
    54  			state = rthash(b[:bufSize], state)
    55  			b = b[bufSize:]
    56  		}
    57  	}
    58  	return rthash(b, state)
    59  }
    60  
    61  // String returns the hash of s with the given seed.
    62  //
    63  // String is equivalent to, but more convenient and efficient than:
    64  //
    65  //	var h Hash
    66  //	h.SetSeed(seed)
    67  //	h.WriteString(s)
    68  //	return h.Sum64()
    69  func String(seed Seed, s string) uint64 {
    70  	state := seed.s
    71  	if state == 0 {
    72  		panic("maphash: use of uninitialized Seed")
    73  	}
    74  	for len(s) > bufSize {
    75  		state = rthashString(s[:bufSize], state)
    76  		s = s[bufSize:]
    77  	}
    78  	return rthashString(s, state)
    79  }
    80  
    81  // A Hash computes a seeded hash of a byte sequence.
    82  //
    83  // The zero Hash is a valid Hash ready to use.
    84  // A zero Hash chooses a random seed for itself during
    85  // the first call to a Reset, Write, Seed, or Sum64 method.
    86  // For control over the seed, use SetSeed.
    87  //
    88  // The computed hash values depend only on the initial seed and
    89  // the sequence of bytes provided to the Hash object, not on the way
    90  // in which the bytes are provided. For example, the three sequences
    91  //
    92  //	h.Write([]byte{'f','o','o'})
    93  //	h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
    94  //	h.WriteString("foo")
    95  //
    96  // all have the same effect.
    97  //
    98  // Hashes are intended to be collision-resistant, even for situations
    99  // where an adversary controls the byte sequences being hashed.
   100  //
   101  // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
   102  // If multiple goroutines must compute the same seeded hash,
   103  // each can declare its own Hash and call SetSeed with a common Seed.
   104  type Hash struct {
   105  	_     [0]func()     // not comparable
   106  	seed  Seed          // initial seed used for this hash
   107  	state Seed          // current hash of all flushed bytes
   108  	buf   [bufSize]byte // unflushed byte buffer
   109  	n     int           // number of unflushed bytes
   110  }
   111  
   112  // bufSize is the size of the Hash write buffer.
   113  // The buffer ensures that writes depend only on the sequence of bytes,
   114  // not the sequence of WriteByte/Write/WriteString calls,
   115  // by always calling rthash with a full buffer (except for the tail).
   116  const bufSize = 128
   117  
   118  // initSeed seeds the hash if necessary.
   119  // initSeed is called lazily before any operation that actually uses h.seed/h.state.
   120  // Note that this does not include Write/WriteByte/WriteString in the case
   121  // where they only add to h.buf. (If they write too much, they call h.flush,
   122  // which does call h.initSeed.)
   123  func (h *Hash) initSeed() {
   124  	if h.seed.s == 0 {
   125  		seed := MakeSeed()
   126  		h.seed = seed
   127  		h.state = seed
   128  	}
   129  }
   130  
   131  // WriteByte adds b to the sequence of bytes hashed by h.
   132  // It never fails; the error result is for implementing [io.ByteWriter].
   133  func (h *Hash) WriteByte(b byte) error {
   134  	if h.n == len(h.buf) {
   135  		h.flush()
   136  	}
   137  	h.buf[h.n] = b
   138  	h.n++
   139  	return nil
   140  }
   141  
   142  // Write adds b to the sequence of bytes hashed by h.
   143  // It always writes all of b and never fails; the count and error result are for implementing [io.Writer].
   144  func (h *Hash) Write(b []byte) (int, error) {
   145  	size := len(b)
   146  	// Deal with bytes left over in h.buf.
   147  	// h.n <= bufSize is always true.
   148  	// Checking it is ~free and it lets the compiler eliminate a bounds check.
   149  	if h.n > 0 && h.n <= bufSize {
   150  		k := copy(h.buf[h.n:], b)
   151  		h.n += k
   152  		if h.n < bufSize {
   153  			// Copied the entirety of b to h.buf.
   154  			return size, nil
   155  		}
   156  		b = b[k:]
   157  		h.flush()
   158  		// No need to set h.n = 0 here; it happens just before exit.
   159  	}
   160  	// Process as many full buffers as possible, without copying, and calling initSeed only once.
   161  	if len(b) > bufSize {
   162  		h.initSeed()
   163  		for len(b) > bufSize {
   164  			h.state.s = rthash(b[:bufSize], h.state.s)
   165  			b = b[bufSize:]
   166  		}
   167  	}
   168  	// Copy the tail.
   169  	copy(h.buf[:], b)
   170  	h.n = len(b)
   171  	return size, nil
   172  }
   173  
   174  // WriteString adds the bytes of s to the sequence of bytes hashed by h.
   175  // It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter].
   176  func (h *Hash) WriteString(s string) (int, error) {
   177  	// WriteString mirrors Write. See Write for comments.
   178  	size := len(s)
   179  	if h.n > 0 && h.n <= bufSize {
   180  		k := copy(h.buf[h.n:], s)
   181  		h.n += k
   182  		if h.n < bufSize {
   183  			return size, nil
   184  		}
   185  		s = s[k:]
   186  		h.flush()
   187  	}
   188  	if len(s) > bufSize {
   189  		h.initSeed()
   190  		for len(s) > bufSize {
   191  			h.state.s = rthashString(s[:bufSize], h.state.s)
   192  			s = s[bufSize:]
   193  		}
   194  	}
   195  	copy(h.buf[:], s)
   196  	h.n = len(s)
   197  	return size, nil
   198  }
   199  
   200  // Seed returns h's seed value.
   201  func (h *Hash) Seed() Seed {
   202  	h.initSeed()
   203  	return h.seed
   204  }
   205  
   206  // SetSeed sets h to use seed, which must have been returned by [MakeSeed]
   207  // or by another [Hash.Seed] method.
   208  // Two [Hash] objects with the same seed behave identically.
   209  // Two [Hash] objects with different seeds will very likely behave differently.
   210  // Any bytes added to h before this call will be discarded.
   211  func (h *Hash) SetSeed(seed Seed) {
   212  	if seed.s == 0 {
   213  		panic("maphash: use of uninitialized Seed")
   214  	}
   215  	h.seed = seed
   216  	h.state = seed
   217  	h.n = 0
   218  }
   219  
   220  // Reset discards all bytes added to h.
   221  // (The seed remains the same.)
   222  func (h *Hash) Reset() {
   223  	h.initSeed()
   224  	h.state = h.seed
   225  	h.n = 0
   226  }
   227  
   228  // precondition: buffer is full.
   229  func (h *Hash) flush() {
   230  	if h.n != len(h.buf) {
   231  		panic("maphash: flush of partially full buffer")
   232  	}
   233  	h.initSeed()
   234  	h.state.s = rthash(h.buf[:h.n], h.state.s)
   235  	h.n = 0
   236  }
   237  
   238  // Sum64 returns h's current 64-bit value, which depends on
   239  // h's seed and the sequence of bytes added to h since the
   240  // last call to [Hash.Reset] or [Hash.SetSeed].
   241  //
   242  // All bits of the Sum64 result are close to uniformly and
   243  // independently distributed, so it can be safely reduced
   244  // by using bit masking, shifting, or modular arithmetic.
   245  func (h *Hash) Sum64() uint64 {
   246  	h.initSeed()
   247  	return rthash(h.buf[:h.n], h.state.s)
   248  }
   249  
   250  // MakeSeed returns a new random seed.
   251  func MakeSeed() Seed {
   252  	var s uint64
   253  	for {
   254  		s = randUint64()
   255  		// We use seed 0 to indicate an uninitialized seed/hash,
   256  		// so keep trying until we get a non-zero seed.
   257  		if s != 0 {
   258  			break
   259  		}
   260  	}
   261  	return Seed{s: s}
   262  }
   263  
   264  // Sum appends the hash's current 64-bit value to b.
   265  // It exists for implementing [hash.Hash].
   266  // For direct calls, it is more efficient to use [Hash.Sum64].
   267  func (h *Hash) Sum(b []byte) []byte {
   268  	x := h.Sum64()
   269  	return append(b,
   270  		byte(x>>0),
   271  		byte(x>>8),
   272  		byte(x>>16),
   273  		byte(x>>24),
   274  		byte(x>>32),
   275  		byte(x>>40),
   276  		byte(x>>48),
   277  		byte(x>>56))
   278  }
   279  
   280  // Size returns h's hash value size, 8 bytes.
   281  func (h *Hash) Size() int { return 8 }
   282  
   283  // BlockSize returns h's block size.
   284  func (h *Hash) BlockSize() int { return len(h.buf) }
   285  
   286  // Comparable returns the hash of comparable value v with the given seed
   287  // such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2.
   288  // If v != v, then the resulting hash is randomly distributed.
   289  func Comparable[T comparable](seed Seed, v T) uint64 {
   290  	comparableReady(v)
   291  	var h Hash
   292  	h.SetSeed(seed)
   293  	comparableF(&h, v)
   294  	return h.Sum64()
   295  }
   296  
   297  func comparableReady[T comparable](v T) {
   298  	// Force v to be on the heap.
   299  	// We cannot hash pointers to local variables,
   300  	// as the address of the local variable
   301  	// might change on stack growth.
   302  	abi.Escape(v)
   303  }
   304  
   305  // WriteComparable adds x to the data hashed by h.
   306  func WriteComparable[T comparable](h *Hash, x T) {
   307  	comparableReady(x)
   308  	comparableF(h, x)
   309  }
   310  
   311  // appendT hash a value,
   312  // when the value cannot be directly hash raw memory,
   313  // or when purego is used.
   314  func appendT(h *Hash, v reflect.Value) {
   315  	h.WriteString(v.Type().String())
   316  	switch v.Kind() {
   317  	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
   318  		var buf [8]byte
   319  		byteorder.LePutUint64(buf[:], uint64(v.Int()))
   320  		h.Write(buf[:])
   321  		return
   322  	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   323  		var buf [8]byte
   324  		byteorder.LePutUint64(buf[:], v.Uint())
   325  		h.Write(buf[:])
   326  		return
   327  	case reflect.Array:
   328  		var buf [8]byte
   329  		for i := range uint64(v.Len()) {
   330  			byteorder.LePutUint64(buf[:], i)
   331  			// do not want to hash to the same value,
   332  			// [2]string{"foo", ""} and [2]string{"", "foo"}.
   333  			h.Write(buf[:])
   334  			appendT(h, v.Index(int(i)))
   335  		}
   336  		return
   337  	case reflect.String:
   338  		h.WriteString(v.String())
   339  		return
   340  	case reflect.Struct:
   341  		var buf [8]byte
   342  		for i := range v.NumField() {
   343  			f := v.Field(i)
   344  			byteorder.LePutUint64(buf[:], uint64(i))
   345  			// do not want to hash to the same value,
   346  			// struct{a,b string}{"foo",""} and
   347  			// struct{a,b string}{"","foo"}.
   348  			h.Write(buf[:])
   349  			appendT(h, f)
   350  		}
   351  		return
   352  	case reflect.Complex64, reflect.Complex128:
   353  		c := v.Complex()
   354  		h.float64(real(c))
   355  		h.float64(imag(c))
   356  		return
   357  	case reflect.Float32, reflect.Float64:
   358  		h.float64(v.Float())
   359  		return
   360  	case reflect.Bool:
   361  		h.WriteByte(btoi(v.Bool()))
   362  		return
   363  	case reflect.UnsafePointer, reflect.Pointer:
   364  		var buf [8]byte
   365  		// because pointing to the abi.Escape call in comparableReady,
   366  		// So this is ok to hash pointer,
   367  		// this way because we know their target won't be moved.
   368  		byteorder.LePutUint64(buf[:], uint64(v.Pointer()))
   369  		h.Write(buf[:])
   370  		return
   371  	case reflect.Interface:
   372  		appendT(h, v.Elem())
   373  		return
   374  	}
   375  	panic("maphash: " + v.Type().String() + " not comparable")
   376  }
   377  
   378  func (h *Hash) float64(f float64) {
   379  	if f == 0 {
   380  		h.WriteByte(0)
   381  		return
   382  	}
   383  	var buf [8]byte
   384  	if f != f {
   385  		byteorder.LePutUint64(buf[:], randUint64())
   386  		h.Write(buf[:])
   387  		return
   388  	}
   389  	byteorder.LePutUint64(buf[:], math.Float64bits(f))
   390  	h.Write(buf[:])
   391  }
   392  
   393  func btoi(b bool) byte {
   394  	if b {
   395  		return 1
   396  	}
   397  	return 0
   398  }
   399  

View as plain text