1 // Copyright 2026 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // func memHashAES(p unsafe.Pointer, h, s uintptr) uintptr
8 // hash function using AES hardware instructions
9 TEXT ·memHashAES<ABIInternal>(SB),NOSPLIT,$0-32
10 // AX = ptr to data
11 // BX = seed
12 // CX = size
13 JMP ·aeshashbody<>(SB)
14
15 // func strhashAES(p unsafe.Pointer, h uintptr) uintptr
16 TEXT ·strHashAES<ABIInternal>(SB),NOSPLIT,$0-24
17 // AX = ptr to string struct
18 // BX = seed
19 MOVQ 8(AX), CX // length of string
20 MOVQ (AX), AX // string data
21 JMP ·aeshashbody<>(SB)
22
23 // AX: data
24 // BX: hash seed
25 // CX: length
26 // At return: AX = return value
27 TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
28 // Fill an SSE register with our seeds.
29 MOVQ BX, X0 // 64 bits of per-table hash seed
30 PINSRW $4, CX, X0 // 16 bits of length
31 PSHUFHW $0, X0, X0 // repeat length 4 times total
32 MOVO X0, X1 // save unscrambled seed
33 PXOR ·aeskeysched(SB), X0 // xor in per-process seed
34 AESENC X0, X0 // scramble seed
35
36 CMPQ CX, $16
37 JB aes0to15
38 JE aes16
39 CMPQ CX, $32
40 JBE aes17to32
41 CMPQ CX, $64
42 JBE aes33to64
43 CMPQ CX, $128
44 JBE aes65to128
45 JMP aes129plus
46
47 aes0to15:
48 TESTQ CX, CX
49 JE aes0
50
51 ADDQ $16, AX
52 TESTW $0xff0, AX
53 JE endofpage
54
55 // 16 bytes loaded at this address won't cross
56 // a page boundary, so we can load it directly.
57 MOVOU -16(AX), X1
58 ADDQ CX, CX
59 MOVQ $masks<>(SB), AX
60 PAND (AX)(CX*8), X1
61 final1:
62 PXOR X0, X1 // xor data with seed
63 AESENC X1, X1 // scramble combo 3 times
64 AESENC X1, X1
65 AESENC X1, X1
66 MOVQ X1, AX // return X1
67 RET
68
69 endofpage:
70 // address ends in 1111xxxx. Might be up against
71 // a page boundary, so load ending at last byte.
72 // Then shift bytes down using pshufb.
73 MOVOU -32(AX)(CX*1), X1
74 ADDQ CX, CX
75 MOVQ $shifts<>(SB), AX
76 PSHUFB (AX)(CX*8), X1
77 JMP final1
78
79 aes0:
80 // Return scrambled input seed
81 AESENC X0, X0
82 MOVQ X0, AX // return X0
83 RET
84
85 aes16:
86 MOVOU (AX), X1
87 JMP final1
88
89 aes17to32:
90 // make second starting seed
91 PXOR ·aeskeysched+16(SB), X1
92 AESENC X1, X1
93
94 // load data to be hashed
95 MOVOU (AX), X2
96 MOVOU -16(AX)(CX*1), X3
97
98 // xor with seed
99 PXOR X0, X2
100 PXOR X1, X3
101
102 // scramble 3 times
103 AESENC X2, X2
104 AESENC X3, X3
105 AESENC X2, X2
106 AESENC X3, X3
107 AESENC X2, X2
108 AESENC X3, X3
109
110 // combine results
111 PXOR X3, X2
112 MOVQ X2, AX // return X2
113 RET
114
115 aes33to64:
116 // make 3 more starting seeds
117 MOVO X1, X2
118 MOVO X1, X3
119 PXOR ·aeskeysched+16(SB), X1
120 PXOR ·aeskeysched+32(SB), X2
121 PXOR ·aeskeysched+48(SB), X3
122 AESENC X1, X1
123 AESENC X2, X2
124 AESENC X3, X3
125
126 MOVOU (AX), X4
127 MOVOU 16(AX), X5
128 MOVOU -32(AX)(CX*1), X6
129 MOVOU -16(AX)(CX*1), X7
130
131 PXOR X0, X4
132 PXOR X1, X5
133 PXOR X2, X6
134 PXOR X3, X7
135
136 AESENC X4, X4
137 AESENC X5, X5
138 AESENC X6, X6
139 AESENC X7, X7
140
141 AESENC X4, X4
142 AESENC X5, X5
143 AESENC X6, X6
144 AESENC X7, X7
145
146 AESENC X4, X4
147 AESENC X5, X5
148 AESENC X6, X6
149 AESENC X7, X7
150
151 PXOR X6, X4
152 PXOR X7, X5
153 PXOR X5, X4
154 MOVQ X4, AX // return X4
155 RET
156
157 aes65to128:
158 // make 7 more starting seeds
159 MOVO X1, X2
160 MOVO X1, X3
161 MOVO X1, X4
162 MOVO X1, X5
163 MOVO X1, X6
164 MOVO X1, X7
165 PXOR ·aeskeysched+16(SB), X1
166 PXOR ·aeskeysched+32(SB), X2
167 PXOR ·aeskeysched+48(SB), X3
168 PXOR ·aeskeysched+64(SB), X4
169 PXOR ·aeskeysched+80(SB), X5
170 PXOR ·aeskeysched+96(SB), X6
171 PXOR ·aeskeysched+112(SB), X7
172 AESENC X1, X1
173 AESENC X2, X2
174 AESENC X3, X3
175 AESENC X4, X4
176 AESENC X5, X5
177 AESENC X6, X6
178 AESENC X7, X7
179
180 // load data
181 MOVOU (AX), X8
182 MOVOU 16(AX), X9
183 MOVOU 32(AX), X10
184 MOVOU 48(AX), X11
185 MOVOU -64(AX)(CX*1), X12
186 MOVOU -48(AX)(CX*1), X13
187 MOVOU -32(AX)(CX*1), X14
188 MOVOU -16(AX)(CX*1), X15
189
190 // xor with seed
191 PXOR X0, X8
192 PXOR X1, X9
193 PXOR X2, X10
194 PXOR X3, X11
195 PXOR X4, X12
196 PXOR X5, X13
197 PXOR X6, X14
198 PXOR X7, X15
199
200 // scramble 3 times
201 AESENC X8, X8
202 AESENC X9, X9
203 AESENC X10, X10
204 AESENC X11, X11
205 AESENC X12, X12
206 AESENC X13, X13
207 AESENC X14, X14
208 AESENC X15, X15
209
210 AESENC X8, X8
211 AESENC X9, X9
212 AESENC X10, X10
213 AESENC X11, X11
214 AESENC X12, X12
215 AESENC X13, X13
216 AESENC X14, X14
217 AESENC X15, X15
218
219 AESENC X8, X8
220 AESENC X9, X9
221 AESENC X10, X10
222 AESENC X11, X11
223 AESENC X12, X12
224 AESENC X13, X13
225 AESENC X14, X14
226 AESENC X15, X15
227
228 // combine results
229 PXOR X12, X8
230 PXOR X13, X9
231 PXOR X14, X10
232 PXOR X15, X11
233 PXOR X10, X8
234 PXOR X11, X9
235 PXOR X9, X8
236 // X15 must be zero on return
237 PXOR X15, X15
238 MOVQ X8, AX // return X8
239 RET
240
241 aes129plus:
242 // make 7 more starting seeds
243 MOVO X1, X2
244 MOVO X1, X3
245 MOVO X1, X4
246 MOVO X1, X5
247 MOVO X1, X6
248 MOVO X1, X7
249 PXOR ·aeskeysched+16(SB), X1
250 PXOR ·aeskeysched+32(SB), X2
251 PXOR ·aeskeysched+48(SB), X3
252 PXOR ·aeskeysched+64(SB), X4
253 PXOR ·aeskeysched+80(SB), X5
254 PXOR ·aeskeysched+96(SB), X6
255 PXOR ·aeskeysched+112(SB), X7
256 AESENC X1, X1
257 AESENC X2, X2
258 AESENC X3, X3
259 AESENC X4, X4
260 AESENC X5, X5
261 AESENC X6, X6
262 AESENC X7, X7
263
264 // start with last (possibly overlapping) block
265 MOVOU -128(AX)(CX*1), X8
266 MOVOU -112(AX)(CX*1), X9
267 MOVOU -96(AX)(CX*1), X10
268 MOVOU -80(AX)(CX*1), X11
269 MOVOU -64(AX)(CX*1), X12
270 MOVOU -48(AX)(CX*1), X13
271 MOVOU -32(AX)(CX*1), X14
272 MOVOU -16(AX)(CX*1), X15
273
274 // xor in seed
275 PXOR X0, X8
276 PXOR X1, X9
277 PXOR X2, X10
278 PXOR X3, X11
279 PXOR X4, X12
280 PXOR X5, X13
281 PXOR X6, X14
282 PXOR X7, X15
283
284 // compute number of remaining 128-byte blocks
285 DECQ CX
286 SHRQ $7, CX
287
288 PCALIGN $16
289 aesloop:
290 // scramble state
291 AESENC X8, X8
292 AESENC X9, X9
293 AESENC X10, X10
294 AESENC X11, X11
295 AESENC X12, X12
296 AESENC X13, X13
297 AESENC X14, X14
298 AESENC X15, X15
299
300 // scramble state, xor in a block
301 MOVOU (AX), X0
302 MOVOU 16(AX), X1
303 MOVOU 32(AX), X2
304 MOVOU 48(AX), X3
305 AESENC X0, X8
306 AESENC X1, X9
307 AESENC X2, X10
308 AESENC X3, X11
309 MOVOU 64(AX), X4
310 MOVOU 80(AX), X5
311 MOVOU 96(AX), X6
312 MOVOU 112(AX), X7
313 AESENC X4, X12
314 AESENC X5, X13
315 AESENC X6, X14
316 AESENC X7, X15
317
318 ADDQ $128, AX
319 DECQ CX
320 JNE aesloop
321
322 // 3 more scrambles to finish
323 AESENC X8, X8
324 AESENC X9, X9
325 AESENC X10, X10
326 AESENC X11, X11
327 AESENC X12, X12
328 AESENC X13, X13
329 AESENC X14, X14
330 AESENC X15, X15
331 AESENC X8, X8
332 AESENC X9, X9
333 AESENC X10, X10
334 AESENC X11, X11
335 AESENC X12, X12
336 AESENC X13, X13
337 AESENC X14, X14
338 AESENC X15, X15
339 AESENC X8, X8
340 AESENC X9, X9
341 AESENC X10, X10
342 AESENC X11, X11
343 AESENC X12, X12
344 AESENC X13, X13
345 AESENC X14, X14
346 AESENC X15, X15
347
348 PXOR X12, X8
349 PXOR X13, X9
350 PXOR X14, X10
351 PXOR X15, X11
352 PXOR X10, X8
353 PXOR X11, X9
354 PXOR X9, X8
355 // X15 must be zero on return
356 PXOR X15, X15
357 MOVQ X8, AX // return X8
358 RET
359
360 // simple mask to get rid of data in the high part of the register.
361 DATA masks<>+0x00(SB)/8, $0x0000000000000000
362 DATA masks<>+0x08(SB)/8, $0x0000000000000000
363 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
364 DATA masks<>+0x18(SB)/8, $0x0000000000000000
365 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
366 DATA masks<>+0x28(SB)/8, $0x0000000000000000
367 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
368 DATA masks<>+0x38(SB)/8, $0x0000000000000000
369 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
370 DATA masks<>+0x48(SB)/8, $0x0000000000000000
371 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
372 DATA masks<>+0x58(SB)/8, $0x0000000000000000
373 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
374 DATA masks<>+0x68(SB)/8, $0x0000000000000000
375 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
376 DATA masks<>+0x78(SB)/8, $0x0000000000000000
377 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
378 DATA masks<>+0x88(SB)/8, $0x0000000000000000
379 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
380 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
381 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
382 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
383 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
384 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
385 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
386 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
387 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
388 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
389 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
390 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
391 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
392 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
393 GLOBL masks<>(SB),RODATA,$256
394
395 // these are arguments to pshufb. They move data down from
396 // the high bytes of the register to the low bytes of the register.
397 // index is how many bytes to move.
398 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
399 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
400 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
401 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
402 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
403 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
404 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
405 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
406 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
407 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
408 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
409 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
410 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
411 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
412 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
413 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
414 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
415 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
416 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
417 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
418 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
419 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
420 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
421 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
422 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
423 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
424 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
425 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
426 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
427 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
428 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
429 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
430 GLOBL shifts<>(SB),RODATA,$256
431
432 TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
433 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
434 MOVQ $masks<>(SB), AX
435 MOVQ $shifts<>(SB), BX
436 ORQ BX, AX
437 TESTQ $15, AX
438 SETEQ AX
439 RET
440
View as plain text