Source file src/hash/maphash/maphash.go
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences and comparable values. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // Each different instance of a hash table or data structure should use its own [Seed]. 10 // 11 // The hash functions are not cryptographically secure. 12 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 13 package maphash 14 15 import ( 16 "internal/abi" 17 "internal/byteorder" 18 "math" 19 "reflect" 20 ) 21 22 // A Seed is a random value that selects the specific hash function 23 // computed by a [Hash]. If two Hashes use the same Seeds, they 24 // will compute the same hash values for any given input. 25 // If two Hashes use different Seeds, they are very likely to compute 26 // distinct hash values for any given input. 27 // 28 // A Seed must be initialized by calling [MakeSeed]. 29 // The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method. 30 // 31 // Each Seed value is local to a single process and cannot be serialized 32 // or otherwise recreated in a different process. 33 type Seed struct { 34 s uint64 35 } 36 37 // Bytes returns the hash of b with the given seed. 38 // 39 // Bytes is equivalent to, but more convenient and efficient than: 40 // 41 // var h Hash 42 // h.SetSeed(seed) 43 // h.Write(b) 44 // return h.Sum64() 45 func Bytes(seed Seed, b []byte) uint64 { 46 state := seed.s 47 if state == 0 { 48 panic("maphash: use of uninitialized Seed") 49 } 50 51 if len(b) > bufSize { 52 b = b[:len(b):len(b)] // merge len and cap calculations when reslicing 53 for len(b) > bufSize { 54 state = rthash(b[:bufSize], state) 55 b = b[bufSize:] 56 } 57 } 58 return rthash(b, state) 59 } 60 61 // String returns the hash of s with the given seed. 62 // 63 // String is equivalent to, but more convenient and efficient than: 64 // 65 // var h Hash 66 // h.SetSeed(seed) 67 // h.WriteString(s) 68 // return h.Sum64() 69 func String(seed Seed, s string) uint64 { 70 state := seed.s 71 if state == 0 { 72 panic("maphash: use of uninitialized Seed") 73 } 74 for len(s) > bufSize { 75 state = rthashString(s[:bufSize], state) 76 s = s[bufSize:] 77 } 78 return rthashString(s, state) 79 } 80 81 // A Hash computes a seeded hash of a byte sequence. 82 // 83 // The zero Hash is a valid Hash ready to use. 84 // A zero Hash chooses a random seed for itself during 85 // the first call to a Reset, Write, Seed, or Sum64 method. 86 // For control over the seed, use SetSeed. 87 // 88 // The computed hash values depend only on the initial seed and 89 // the sequence of bytes provided to the Hash object, not on the way 90 // in which the bytes are provided. For example, the three sequences 91 // 92 // h.Write([]byte{'f','o','o'}) 93 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 94 // h.WriteString("foo") 95 // 96 // all have the same effect. 97 // 98 // Hashes are intended to be collision-resistant, even for situations 99 // where an adversary controls the byte sequences being hashed. 100 // 101 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 102 // If multiple goroutines must compute the same seeded hash, 103 // each can declare its own Hash and call SetSeed with a common Seed. 104 type Hash struct { 105 _ [0]func() // not comparable 106 seed Seed // initial seed used for this hash 107 state Seed // current hash of all flushed bytes 108 buf [bufSize]byte // unflushed byte buffer 109 n int // number of unflushed bytes 110 } 111 112 // bufSize is the size of the Hash write buffer. 113 // The buffer ensures that writes depend only on the sequence of bytes, 114 // not the sequence of WriteByte/Write/WriteString calls, 115 // by always calling rthash with a full buffer (except for the tail). 116 const bufSize = 128 117 118 // initSeed seeds the hash if necessary. 119 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 120 // Note that this does not include Write/WriteByte/WriteString in the case 121 // where they only add to h.buf. (If they write too much, they call h.flush, 122 // which does call h.initSeed.) 123 func (h *Hash) initSeed() { 124 if h.seed.s == 0 { 125 seed := MakeSeed() 126 h.seed = seed 127 h.state = seed 128 } 129 } 130 131 // WriteByte adds b to the sequence of bytes hashed by h. 132 // It never fails; the error result is for implementing [io.ByteWriter]. 133 func (h *Hash) WriteByte(b byte) error { 134 if h.n == len(h.buf) { 135 h.flush() 136 } 137 h.buf[h.n] = b 138 h.n++ 139 return nil 140 } 141 142 // Write adds b to the sequence of bytes hashed by h. 143 // It always writes all of b and never fails; the count and error result are for implementing [io.Writer]. 144 func (h *Hash) Write(b []byte) (int, error) { 145 size := len(b) 146 // Deal with bytes left over in h.buf. 147 // h.n <= bufSize is always true. 148 // Checking it is ~free and it lets the compiler eliminate a bounds check. 149 if h.n > 0 && h.n <= bufSize { 150 k := copy(h.buf[h.n:], b) 151 h.n += k 152 if h.n < bufSize { 153 // Copied the entirety of b to h.buf. 154 return size, nil 155 } 156 b = b[k:] 157 h.flush() 158 // No need to set h.n = 0 here; it happens just before exit. 159 } 160 // Process as many full buffers as possible, without copying, and calling initSeed only once. 161 if len(b) > bufSize { 162 h.initSeed() 163 for len(b) > bufSize { 164 h.state.s = rthash(b[:bufSize], h.state.s) 165 b = b[bufSize:] 166 } 167 } 168 // Copy the tail. 169 copy(h.buf[:], b) 170 h.n = len(b) 171 return size, nil 172 } 173 174 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 175 // It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter]. 176 func (h *Hash) WriteString(s string) (int, error) { 177 // WriteString mirrors Write. See Write for comments. 178 size := len(s) 179 if h.n > 0 && h.n <= bufSize { 180 k := copy(h.buf[h.n:], s) 181 h.n += k 182 if h.n < bufSize { 183 return size, nil 184 } 185 s = s[k:] 186 h.flush() 187 } 188 if len(s) > bufSize { 189 h.initSeed() 190 for len(s) > bufSize { 191 h.state.s = rthashString(s[:bufSize], h.state.s) 192 s = s[bufSize:] 193 } 194 } 195 copy(h.buf[:], s) 196 h.n = len(s) 197 return size, nil 198 } 199 200 // Seed returns h's seed value. 201 func (h *Hash) Seed() Seed { 202 h.initSeed() 203 return h.seed 204 } 205 206 // SetSeed sets h to use seed, which must have been returned by [MakeSeed] 207 // or by another [Hash.Seed] method. 208 // Two [Hash] objects with the same seed behave identically. 209 // Two [Hash] objects with different seeds will very likely behave differently. 210 // Any bytes added to h before this call will be discarded. 211 func (h *Hash) SetSeed(seed Seed) { 212 if seed.s == 0 { 213 panic("maphash: use of uninitialized Seed") 214 } 215 h.seed = seed 216 h.state = seed 217 h.n = 0 218 } 219 220 // Reset discards all bytes added to h. 221 // (The seed remains the same.) 222 func (h *Hash) Reset() { 223 h.initSeed() 224 h.state = h.seed 225 h.n = 0 226 } 227 228 // precondition: buffer is full. 229 func (h *Hash) flush() { 230 if h.n != len(h.buf) { 231 panic("maphash: flush of partially full buffer") 232 } 233 h.initSeed() 234 h.state.s = rthash(h.buf[:h.n], h.state.s) 235 h.n = 0 236 } 237 238 // Sum64 returns h's current 64-bit value, which depends on 239 // h's seed and the sequence of bytes added to h since the 240 // last call to [Hash.Reset] or [Hash.SetSeed]. 241 // 242 // All bits of the Sum64 result are close to uniformly and 243 // independently distributed, so it can be safely reduced 244 // by using bit masking, shifting, or modular arithmetic. 245 func (h *Hash) Sum64() uint64 { 246 h.initSeed() 247 return rthash(h.buf[:h.n], h.state.s) 248 } 249 250 // MakeSeed returns a new random seed. 251 func MakeSeed() Seed { 252 var s uint64 253 for { 254 s = randUint64() 255 // We use seed 0 to indicate an uninitialized seed/hash, 256 // so keep trying until we get a non-zero seed. 257 if s != 0 { 258 break 259 } 260 } 261 return Seed{s: s} 262 } 263 264 // Sum appends the hash's current 64-bit value to b. 265 // It exists for implementing [hash.Hash]. 266 // For direct calls, it is more efficient to use [Hash.Sum64]. 267 func (h *Hash) Sum(b []byte) []byte { 268 x := h.Sum64() 269 return append(b, 270 byte(x>>0), 271 byte(x>>8), 272 byte(x>>16), 273 byte(x>>24), 274 byte(x>>32), 275 byte(x>>40), 276 byte(x>>48), 277 byte(x>>56)) 278 } 279 280 // Size returns h's hash value size, 8 bytes. 281 func (h *Hash) Size() int { return 8 } 282 283 // BlockSize returns h's block size. 284 func (h *Hash) BlockSize() int { return len(h.buf) } 285 286 // Comparable returns the hash of comparable value v with the given seed 287 // such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2. 288 // If v != v, then the resulting hash is randomly distributed. 289 func Comparable[T comparable](seed Seed, v T) uint64 { 290 comparableReady(v) 291 var h Hash 292 h.SetSeed(seed) 293 comparableF(&h, v) 294 return h.Sum64() 295 } 296 297 func comparableReady[T comparable](v T) { 298 // Force v to be on the heap. 299 // We cannot hash pointers to local variables, 300 // as the address of the local variable 301 // might change on stack growth. 302 abi.Escape(v) 303 } 304 305 // WriteComparable adds x to the data hashed by h. 306 func WriteComparable[T comparable](h *Hash, x T) { 307 comparableReady(x) 308 comparableF(h, x) 309 } 310 311 // appendT hash a value, 312 // when the value cannot be directly hash raw memory, 313 // or when purego is used. 314 func appendT(h *Hash, v reflect.Value) { 315 h.WriteString(v.Type().String()) 316 switch v.Kind() { 317 case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: 318 var buf [8]byte 319 byteorder.LePutUint64(buf[:], uint64(v.Int())) 320 h.Write(buf[:]) 321 return 322 case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr: 323 var buf [8]byte 324 byteorder.LePutUint64(buf[:], v.Uint()) 325 h.Write(buf[:]) 326 return 327 case reflect.Array: 328 var buf [8]byte 329 for i := range uint64(v.Len()) { 330 byteorder.LePutUint64(buf[:], i) 331 // do not want to hash to the same value, 332 // [2]string{"foo", ""} and [2]string{"", "foo"}. 333 h.Write(buf[:]) 334 appendT(h, v.Index(int(i))) 335 } 336 return 337 case reflect.String: 338 h.WriteString(v.String()) 339 return 340 case reflect.Struct: 341 var buf [8]byte 342 for i := range v.NumField() { 343 f := v.Field(i) 344 byteorder.LePutUint64(buf[:], uint64(i)) 345 // do not want to hash to the same value, 346 // struct{a,b string}{"foo",""} and 347 // struct{a,b string}{"","foo"}. 348 h.Write(buf[:]) 349 appendT(h, f) 350 } 351 return 352 case reflect.Complex64, reflect.Complex128: 353 c := v.Complex() 354 h.float64(real(c)) 355 h.float64(imag(c)) 356 return 357 case reflect.Float32, reflect.Float64: 358 h.float64(v.Float()) 359 return 360 case reflect.Bool: 361 h.WriteByte(btoi(v.Bool())) 362 return 363 case reflect.UnsafePointer, reflect.Pointer: 364 var buf [8]byte 365 // because pointing to the abi.Escape call in comparableReady, 366 // So this is ok to hash pointer, 367 // this way because we know their target won't be moved. 368 byteorder.LePutUint64(buf[:], uint64(v.Pointer())) 369 h.Write(buf[:]) 370 return 371 case reflect.Interface: 372 appendT(h, v.Elem()) 373 return 374 } 375 panic("maphash: " + v.Type().String() + " not comparable") 376 } 377 378 func (h *Hash) float64(f float64) { 379 if f == 0 { 380 h.WriteByte(0) 381 return 382 } 383 var buf [8]byte 384 if f != f { 385 byteorder.LePutUint64(buf[:], randUint64()) 386 h.Write(buf[:]) 387 return 388 } 389 byteorder.LePutUint64(buf[:], math.Float64bits(f)) 390 h.Write(buf[:]) 391 } 392 393 func btoi(b bool) byte { 394 if b { 395 return 1 396 } 397 return 0 398 } 399