1 // Copyright 2026 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // hash function using AES hardware instructions
8 TEXT ·memHash32AES(SB),NOSPLIT,$0-12
9 MOVL p+0(FP), AX // ptr to data
10 MOVL h+4(FP), X0 // seed
11 PINSRD $1, (AX), X0 // data
12 AESENC ·aeskeysched+0(SB), X0
13 AESENC ·aeskeysched+16(SB), X0
14 AESENC ·aeskeysched+32(SB), X0
15 MOVL X0, ret+8(FP)
16 RET
17
18 TEXT ·memHash64AES(SB),NOSPLIT,$0-12
19 MOVL p+0(FP), AX // ptr to data
20 MOVQ (AX), X0 // data
21 PINSRD $2, h+4(FP), X0 // seed
22 AESENC ·aeskeysched+0(SB), X0
23 AESENC ·aeskeysched+16(SB), X0
24 AESENC ·aeskeysched+32(SB), X0
25 MOVL X0, ret+8(FP)
26 RET
27
28 TEXT ·memHashAES(SB),NOSPLIT,$0-16
29 MOVL p+0(FP), AX // ptr to data
30 MOVL s+8(FP), BX // size
31 LEAL ret+12(FP), DX
32 JMP ·aeshashbody<>(SB)
33
34 TEXT ·strHashAES(SB),NOSPLIT,$0-12
35 MOVL p+0(FP), AX // ptr to string object
36 MOVL 4(AX), BX // length of string
37 MOVL (AX), AX // string data
38 LEAL ret+8(FP), DX
39 JMP ·aeshashbody<>(SB)
40
41 // AX: data
42 // BX: length
43 // DX: address to put return value
44 TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
45 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
46 PINSRW $4, BX, X0 // 16 bits of length
47 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
48 MOVO X0, X1 // save unscrambled seed
49 PXOR ·aeskeysched(SB), X0 // xor in per-process seed
50 AESENC X0, X0 // scramble seed
51
52 CMPL BX, $16
53 JB aes0to15
54 JE aes16
55 CMPL BX, $32
56 JBE aes17to32
57 CMPL BX, $64
58 JBE aes33to64
59 JMP aes65plus
60
61 aes0to15:
62 TESTL BX, BX
63 JE aes0
64
65 ADDL $16, AX
66 TESTW $0xff0, AX
67 JE endofpage
68
69 // 16 bytes loaded at this address won't cross
70 // a page boundary, so we can load it directly.
71 MOVOU -16(AX), X1
72 ADDL BX, BX
73 PAND masks<>(SB)(BX*8), X1
74
75 final1:
76 PXOR X0, X1 // xor data with seed
77 AESENC X1, X1 // scramble combo 3 times
78 AESENC X1, X1
79 AESENC X1, X1
80 MOVL X1, (DX)
81 RET
82
83 endofpage:
84 // address ends in 1111xxxx. Might be up against
85 // a page boundary, so load ending at last byte.
86 // Then shift bytes down using pshufb.
87 MOVOU -32(AX)(BX*1), X1
88 ADDL BX, BX
89 PSHUFB shifts<>(SB)(BX*8), X1
90 JMP final1
91
92 aes0:
93 // Return scrambled input seed
94 AESENC X0, X0
95 MOVL X0, (DX)
96 RET
97
98 aes16:
99 MOVOU (AX), X1
100 JMP final1
101
102 aes17to32:
103 // make second starting seed
104 PXOR ·aeskeysched+16(SB), X1
105 AESENC X1, X1
106
107 // load data to be hashed
108 MOVOU (AX), X2
109 MOVOU -16(AX)(BX*1), X3
110
111 // xor with seed
112 PXOR X0, X2
113 PXOR X1, X3
114
115 // scramble 3 times
116 AESENC X2, X2
117 AESENC X3, X3
118 AESENC X2, X2
119 AESENC X3, X3
120 AESENC X2, X2
121 AESENC X3, X3
122
123 // combine results
124 PXOR X3, X2
125 MOVL X2, (DX)
126 RET
127
128 aes33to64:
129 // make 3 more starting seeds
130 MOVO X1, X2
131 MOVO X1, X3
132 PXOR ·aeskeysched+16(SB), X1
133 PXOR ·aeskeysched+32(SB), X2
134 PXOR ·aeskeysched+48(SB), X3
135 AESENC X1, X1
136 AESENC X2, X2
137 AESENC X3, X3
138
139 MOVOU (AX), X4
140 MOVOU 16(AX), X5
141 MOVOU -32(AX)(BX*1), X6
142 MOVOU -16(AX)(BX*1), X7
143
144 PXOR X0, X4
145 PXOR X1, X5
146 PXOR X2, X6
147 PXOR X3, X7
148
149 AESENC X4, X4
150 AESENC X5, X5
151 AESENC X6, X6
152 AESENC X7, X7
153
154 AESENC X4, X4
155 AESENC X5, X5
156 AESENC X6, X6
157 AESENC X7, X7
158
159 AESENC X4, X4
160 AESENC X5, X5
161 AESENC X6, X6
162 AESENC X7, X7
163
164 PXOR X6, X4
165 PXOR X7, X5
166 PXOR X5, X4
167 MOVL X4, (DX)
168 RET
169
170 aes65plus:
171 // make 3 more starting seeds
172 MOVO X1, X2
173 MOVO X1, X3
174 PXOR ·aeskeysched+16(SB), X1
175 PXOR ·aeskeysched+32(SB), X2
176 PXOR ·aeskeysched+48(SB), X3
177 AESENC X1, X1
178 AESENC X2, X2
179 AESENC X3, X3
180
181 // start with last (possibly overlapping) block
182 MOVOU -64(AX)(BX*1), X4
183 MOVOU -48(AX)(BX*1), X5
184 MOVOU -32(AX)(BX*1), X6
185 MOVOU -16(AX)(BX*1), X7
186
187 // scramble state once
188 AESENC X0, X4
189 AESENC X1, X5
190 AESENC X2, X6
191 AESENC X3, X7
192
193 // compute number of remaining 64-byte blocks
194 DECL BX
195 SHRL $6, BX
196
197 aesloop:
198 // scramble state, xor in a block
199 MOVOU (AX), X0
200 MOVOU 16(AX), X1
201 MOVOU 32(AX), X2
202 MOVOU 48(AX), X3
203 AESENC X0, X4
204 AESENC X1, X5
205 AESENC X2, X6
206 AESENC X3, X7
207
208 // scramble state
209 AESENC X4, X4
210 AESENC X5, X5
211 AESENC X6, X6
212 AESENC X7, X7
213
214 ADDL $64, AX
215 DECL BX
216 JNE aesloop
217
218 // 3 more scrambles to finish
219 AESENC X4, X4
220 AESENC X5, X5
221 AESENC X6, X6
222 AESENC X7, X7
223
224 AESENC X4, X4
225 AESENC X5, X5
226 AESENC X6, X6
227 AESENC X7, X7
228
229 AESENC X4, X4
230 AESENC X5, X5
231 AESENC X6, X6
232 AESENC X7, X7
233
234 PXOR X6, X4
235 PXOR X7, X5
236 PXOR X5, X4
237 MOVL X4, (DX)
238 RET
239
240 // simple mask to get rid of data in the high part of the register.
241 DATA masks<>+0x00(SB)/4, $0x00000000
242 DATA masks<>+0x04(SB)/4, $0x00000000
243 DATA masks<>+0x08(SB)/4, $0x00000000
244 DATA masks<>+0x0c(SB)/4, $0x00000000
245
246 DATA masks<>+0x10(SB)/4, $0x000000ff
247 DATA masks<>+0x14(SB)/4, $0x00000000
248 DATA masks<>+0x18(SB)/4, $0x00000000
249 DATA masks<>+0x1c(SB)/4, $0x00000000
250
251 DATA masks<>+0x20(SB)/4, $0x0000ffff
252 DATA masks<>+0x24(SB)/4, $0x00000000
253 DATA masks<>+0x28(SB)/4, $0x00000000
254 DATA masks<>+0x2c(SB)/4, $0x00000000
255
256 DATA masks<>+0x30(SB)/4, $0x00ffffff
257 DATA masks<>+0x34(SB)/4, $0x00000000
258 DATA masks<>+0x38(SB)/4, $0x00000000
259 DATA masks<>+0x3c(SB)/4, $0x00000000
260
261 DATA masks<>+0x40(SB)/4, $0xffffffff
262 DATA masks<>+0x44(SB)/4, $0x00000000
263 DATA masks<>+0x48(SB)/4, $0x00000000
264 DATA masks<>+0x4c(SB)/4, $0x00000000
265
266 DATA masks<>+0x50(SB)/4, $0xffffffff
267 DATA masks<>+0x54(SB)/4, $0x000000ff
268 DATA masks<>+0x58(SB)/4, $0x00000000
269 DATA masks<>+0x5c(SB)/4, $0x00000000
270
271 DATA masks<>+0x60(SB)/4, $0xffffffff
272 DATA masks<>+0x64(SB)/4, $0x0000ffff
273 DATA masks<>+0x68(SB)/4, $0x00000000
274 DATA masks<>+0x6c(SB)/4, $0x00000000
275
276 DATA masks<>+0x70(SB)/4, $0xffffffff
277 DATA masks<>+0x74(SB)/4, $0x00ffffff
278 DATA masks<>+0x78(SB)/4, $0x00000000
279 DATA masks<>+0x7c(SB)/4, $0x00000000
280
281 DATA masks<>+0x80(SB)/4, $0xffffffff
282 DATA masks<>+0x84(SB)/4, $0xffffffff
283 DATA masks<>+0x88(SB)/4, $0x00000000
284 DATA masks<>+0x8c(SB)/4, $0x00000000
285
286 DATA masks<>+0x90(SB)/4, $0xffffffff
287 DATA masks<>+0x94(SB)/4, $0xffffffff
288 DATA masks<>+0x98(SB)/4, $0x000000ff
289 DATA masks<>+0x9c(SB)/4, $0x00000000
290
291 DATA masks<>+0xa0(SB)/4, $0xffffffff
292 DATA masks<>+0xa4(SB)/4, $0xffffffff
293 DATA masks<>+0xa8(SB)/4, $0x0000ffff
294 DATA masks<>+0xac(SB)/4, $0x00000000
295
296 DATA masks<>+0xb0(SB)/4, $0xffffffff
297 DATA masks<>+0xb4(SB)/4, $0xffffffff
298 DATA masks<>+0xb8(SB)/4, $0x00ffffff
299 DATA masks<>+0xbc(SB)/4, $0x00000000
300
301 DATA masks<>+0xc0(SB)/4, $0xffffffff
302 DATA masks<>+0xc4(SB)/4, $0xffffffff
303 DATA masks<>+0xc8(SB)/4, $0xffffffff
304 DATA masks<>+0xcc(SB)/4, $0x00000000
305
306 DATA masks<>+0xd0(SB)/4, $0xffffffff
307 DATA masks<>+0xd4(SB)/4, $0xffffffff
308 DATA masks<>+0xd8(SB)/4, $0xffffffff
309 DATA masks<>+0xdc(SB)/4, $0x000000ff
310
311 DATA masks<>+0xe0(SB)/4, $0xffffffff
312 DATA masks<>+0xe4(SB)/4, $0xffffffff
313 DATA masks<>+0xe8(SB)/4, $0xffffffff
314 DATA masks<>+0xec(SB)/4, $0x0000ffff
315
316 DATA masks<>+0xf0(SB)/4, $0xffffffff
317 DATA masks<>+0xf4(SB)/4, $0xffffffff
318 DATA masks<>+0xf8(SB)/4, $0xffffffff
319 DATA masks<>+0xfc(SB)/4, $0x00ffffff
320
321 GLOBL masks<>(SB),RODATA,$256
322
323 // these are arguments to pshufb. They move data down from
324 // the high bytes of the register to the low bytes of the register.
325 // index is how many bytes to move.
326 DATA shifts<>+0x00(SB)/4, $0x00000000
327 DATA shifts<>+0x04(SB)/4, $0x00000000
328 DATA shifts<>+0x08(SB)/4, $0x00000000
329 DATA shifts<>+0x0c(SB)/4, $0x00000000
330
331 DATA shifts<>+0x10(SB)/4, $0xffffff0f
332 DATA shifts<>+0x14(SB)/4, $0xffffffff
333 DATA shifts<>+0x18(SB)/4, $0xffffffff
334 DATA shifts<>+0x1c(SB)/4, $0xffffffff
335
336 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
337 DATA shifts<>+0x24(SB)/4, $0xffffffff
338 DATA shifts<>+0x28(SB)/4, $0xffffffff
339 DATA shifts<>+0x2c(SB)/4, $0xffffffff
340
341 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
342 DATA shifts<>+0x34(SB)/4, $0xffffffff
343 DATA shifts<>+0x38(SB)/4, $0xffffffff
344 DATA shifts<>+0x3c(SB)/4, $0xffffffff
345
346 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
347 DATA shifts<>+0x44(SB)/4, $0xffffffff
348 DATA shifts<>+0x48(SB)/4, $0xffffffff
349 DATA shifts<>+0x4c(SB)/4, $0xffffffff
350
351 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
352 DATA shifts<>+0x54(SB)/4, $0xffffff0f
353 DATA shifts<>+0x58(SB)/4, $0xffffffff
354 DATA shifts<>+0x5c(SB)/4, $0xffffffff
355
356 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
357 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
358 DATA shifts<>+0x68(SB)/4, $0xffffffff
359 DATA shifts<>+0x6c(SB)/4, $0xffffffff
360
361 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
362 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
363 DATA shifts<>+0x78(SB)/4, $0xffffffff
364 DATA shifts<>+0x7c(SB)/4, $0xffffffff
365
366 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
367 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
368 DATA shifts<>+0x88(SB)/4, $0xffffffff
369 DATA shifts<>+0x8c(SB)/4, $0xffffffff
370
371 DATA shifts<>+0x90(SB)/4, $0x0a090807
372 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
373 DATA shifts<>+0x98(SB)/4, $0xffffff0f
374 DATA shifts<>+0x9c(SB)/4, $0xffffffff
375
376 DATA shifts<>+0xa0(SB)/4, $0x09080706
377 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
378 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
379 DATA shifts<>+0xac(SB)/4, $0xffffffff
380
381 DATA shifts<>+0xb0(SB)/4, $0x08070605
382 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
383 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
384 DATA shifts<>+0xbc(SB)/4, $0xffffffff
385
386 DATA shifts<>+0xc0(SB)/4, $0x07060504
387 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
388 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
389 DATA shifts<>+0xcc(SB)/4, $0xffffffff
390
391 DATA shifts<>+0xd0(SB)/4, $0x06050403
392 DATA shifts<>+0xd4(SB)/4, $0x0a090807
393 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
394 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
395
396 DATA shifts<>+0xe0(SB)/4, $0x05040302
397 DATA shifts<>+0xe4(SB)/4, $0x09080706
398 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
399 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
400
401 DATA shifts<>+0xf0(SB)/4, $0x04030201
402 DATA shifts<>+0xf4(SB)/4, $0x08070605
403 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
404 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
405
406 GLOBL shifts<>(SB),RODATA,$256
407
408 TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
409 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
410 MOVL $masks<>(SB), AX
411 MOVL $shifts<>(SB), BX
412 ORL BX, AX
413 TESTL $15, AX
414 SETEQ ret+0(FP)
415 RET
416
View as plain text