Text file src/internal/runtime/maps/memhash_amd64.s

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // func memHashAES(p unsafe.Pointer, h, s uintptr) uintptr
     8  // hash function using AES hardware instructions
     9  TEXT ·memHashAES<ABIInternal>(SB),NOSPLIT,$0-32
    10  	// AX = ptr to data
    11  	// BX = seed
    12  	// CX = size
    13  	JMP	·aeshashbody<>(SB)
    14  
    15  // func strhashAES(p unsafe.Pointer, h uintptr) uintptr
    16  TEXT ·strHashAES<ABIInternal>(SB),NOSPLIT,$0-24
    17  	// AX = ptr to string struct
    18  	// BX = seed
    19  	MOVQ	8(AX), CX	// length of string
    20  	MOVQ	(AX), AX	// string data
    21  	JMP	·aeshashbody<>(SB)
    22  
    23  // AX: data
    24  // BX: hash seed
    25  // CX: length
    26  // At return: AX = return value
    27  TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
    28  	// Fill an SSE register with our seeds.
    29  	MOVQ	BX, X0				// 64 bits of per-table hash seed
    30  	PINSRW	$4, CX, X0			// 16 bits of length
    31  	PSHUFHW $0, X0, X0			// repeat length 4 times total
    32  	MOVO	X0, X1				// save unscrambled seed
    33  	PXOR	·aeskeysched(SB), X0	// xor in per-process seed
    34  	AESENC	X0, X0				// scramble seed
    35  
    36  	CMPQ	CX, $16
    37  	JB	aes0to15
    38  	JE	aes16
    39  	CMPQ	CX, $32
    40  	JBE	aes17to32
    41  	CMPQ	CX, $64
    42  	JBE	aes33to64
    43  	CMPQ	CX, $128
    44  	JBE	aes65to128
    45  	JMP	aes129plus
    46  
    47  aes0to15:
    48  	TESTQ	CX, CX
    49  	JE	aes0
    50  
    51  	ADDQ	$16, AX
    52  	TESTW	$0xff0, AX
    53  	JE	endofpage
    54  
    55  	// 16 bytes loaded at this address won't cross
    56  	// a page boundary, so we can load it directly.
    57  	MOVOU	-16(AX), X1
    58  	ADDQ	CX, CX
    59  	MOVQ	$masks<>(SB), AX
    60  	PAND	(AX)(CX*8), X1
    61  final1:
    62  	PXOR	X0, X1	// xor data with seed
    63  	AESENC	X1, X1	// scramble combo 3 times
    64  	AESENC	X1, X1
    65  	AESENC	X1, X1
    66  	MOVQ	X1, AX	// return X1
    67  	RET
    68  
    69  endofpage:
    70  	// address ends in 1111xxxx. Might be up against
    71  	// a page boundary, so load ending at last byte.
    72  	// Then shift bytes down using pshufb.
    73  	MOVOU	-32(AX)(CX*1), X1
    74  	ADDQ	CX, CX
    75  	MOVQ	$shifts<>(SB), AX
    76  	PSHUFB	(AX)(CX*8), X1
    77  	JMP	final1
    78  
    79  aes0:
    80  	// Return scrambled input seed
    81  	AESENC	X0, X0
    82  	MOVQ	X0, AX	// return X0
    83  	RET
    84  
    85  aes16:
    86  	MOVOU	(AX), X1
    87  	JMP	final1
    88  
    89  aes17to32:
    90  	// make second starting seed
    91  	PXOR	·aeskeysched+16(SB), X1
    92  	AESENC	X1, X1
    93  
    94  	// load data to be hashed
    95  	MOVOU	(AX), X2
    96  	MOVOU	-16(AX)(CX*1), X3
    97  
    98  	// xor with seed
    99  	PXOR	X0, X2
   100  	PXOR	X1, X3
   101  
   102  	// scramble 3 times
   103  	AESENC	X2, X2
   104  	AESENC	X3, X3
   105  	AESENC	X2, X2
   106  	AESENC	X3, X3
   107  	AESENC	X2, X2
   108  	AESENC	X3, X3
   109  
   110  	// combine results
   111  	PXOR	X3, X2
   112  	MOVQ	X2, AX	// return X2
   113  	RET
   114  
   115  aes33to64:
   116  	// make 3 more starting seeds
   117  	MOVO	X1, X2
   118  	MOVO	X1, X3
   119  	PXOR	·aeskeysched+16(SB), X1
   120  	PXOR	·aeskeysched+32(SB), X2
   121  	PXOR	·aeskeysched+48(SB), X3
   122  	AESENC	X1, X1
   123  	AESENC	X2, X2
   124  	AESENC	X3, X3
   125  
   126  	MOVOU	(AX), X4
   127  	MOVOU	16(AX), X5
   128  	MOVOU	-32(AX)(CX*1), X6
   129  	MOVOU	-16(AX)(CX*1), X7
   130  
   131  	PXOR	X0, X4
   132  	PXOR	X1, X5
   133  	PXOR	X2, X6
   134  	PXOR	X3, X7
   135  
   136  	AESENC	X4, X4
   137  	AESENC	X5, X5
   138  	AESENC	X6, X6
   139  	AESENC	X7, X7
   140  
   141  	AESENC	X4, X4
   142  	AESENC	X5, X5
   143  	AESENC	X6, X6
   144  	AESENC	X7, X7
   145  
   146  	AESENC	X4, X4
   147  	AESENC	X5, X5
   148  	AESENC	X6, X6
   149  	AESENC	X7, X7
   150  
   151  	PXOR	X6, X4
   152  	PXOR	X7, X5
   153  	PXOR	X5, X4
   154  	MOVQ	X4, AX	// return X4
   155  	RET
   156  
   157  aes65to128:
   158  	// make 7 more starting seeds
   159  	MOVO	X1, X2
   160  	MOVO	X1, X3
   161  	MOVO	X1, X4
   162  	MOVO	X1, X5
   163  	MOVO	X1, X6
   164  	MOVO	X1, X7
   165  	PXOR	·aeskeysched+16(SB), X1
   166  	PXOR	·aeskeysched+32(SB), X2
   167  	PXOR	·aeskeysched+48(SB), X3
   168  	PXOR	·aeskeysched+64(SB), X4
   169  	PXOR	·aeskeysched+80(SB), X5
   170  	PXOR	·aeskeysched+96(SB), X6
   171  	PXOR	·aeskeysched+112(SB), X7
   172  	AESENC	X1, X1
   173  	AESENC	X2, X2
   174  	AESENC	X3, X3
   175  	AESENC	X4, X4
   176  	AESENC	X5, X5
   177  	AESENC	X6, X6
   178  	AESENC	X7, X7
   179  
   180  	// load data
   181  	MOVOU	(AX), X8
   182  	MOVOU	16(AX), X9
   183  	MOVOU	32(AX), X10
   184  	MOVOU	48(AX), X11
   185  	MOVOU	-64(AX)(CX*1), X12
   186  	MOVOU	-48(AX)(CX*1), X13
   187  	MOVOU	-32(AX)(CX*1), X14
   188  	MOVOU	-16(AX)(CX*1), X15
   189  
   190  	// xor with seed
   191  	PXOR	X0, X8
   192  	PXOR	X1, X9
   193  	PXOR	X2, X10
   194  	PXOR	X3, X11
   195  	PXOR	X4, X12
   196  	PXOR	X5, X13
   197  	PXOR	X6, X14
   198  	PXOR	X7, X15
   199  
   200  	// scramble 3 times
   201  	AESENC	X8, X8
   202  	AESENC	X9, X9
   203  	AESENC	X10, X10
   204  	AESENC	X11, X11
   205  	AESENC	X12, X12
   206  	AESENC	X13, X13
   207  	AESENC	X14, X14
   208  	AESENC	X15, X15
   209  
   210  	AESENC	X8, X8
   211  	AESENC	X9, X9
   212  	AESENC	X10, X10
   213  	AESENC	X11, X11
   214  	AESENC	X12, X12
   215  	AESENC	X13, X13
   216  	AESENC	X14, X14
   217  	AESENC	X15, X15
   218  
   219  	AESENC	X8, X8
   220  	AESENC	X9, X9
   221  	AESENC	X10, X10
   222  	AESENC	X11, X11
   223  	AESENC	X12, X12
   224  	AESENC	X13, X13
   225  	AESENC	X14, X14
   226  	AESENC	X15, X15
   227  
   228  	// combine results
   229  	PXOR	X12, X8
   230  	PXOR	X13, X9
   231  	PXOR	X14, X10
   232  	PXOR	X15, X11
   233  	PXOR	X10, X8
   234  	PXOR	X11, X9
   235  	PXOR	X9, X8
   236  	// X15 must be zero on return
   237  	PXOR	X15, X15
   238  	MOVQ	X8, AX	// return X8
   239  	RET
   240  
   241  aes129plus:
   242  	// make 7 more starting seeds
   243  	MOVO	X1, X2
   244  	MOVO	X1, X3
   245  	MOVO	X1, X4
   246  	MOVO	X1, X5
   247  	MOVO	X1, X6
   248  	MOVO	X1, X7
   249  	PXOR	·aeskeysched+16(SB), X1
   250  	PXOR	·aeskeysched+32(SB), X2
   251  	PXOR	·aeskeysched+48(SB), X3
   252  	PXOR	·aeskeysched+64(SB), X4
   253  	PXOR	·aeskeysched+80(SB), X5
   254  	PXOR	·aeskeysched+96(SB), X6
   255  	PXOR	·aeskeysched+112(SB), X7
   256  	AESENC	X1, X1
   257  	AESENC	X2, X2
   258  	AESENC	X3, X3
   259  	AESENC	X4, X4
   260  	AESENC	X5, X5
   261  	AESENC	X6, X6
   262  	AESENC	X7, X7
   263  
   264  	// start with last (possibly overlapping) block
   265  	MOVOU	-128(AX)(CX*1), X8
   266  	MOVOU	-112(AX)(CX*1), X9
   267  	MOVOU	-96(AX)(CX*1), X10
   268  	MOVOU	-80(AX)(CX*1), X11
   269  	MOVOU	-64(AX)(CX*1), X12
   270  	MOVOU	-48(AX)(CX*1), X13
   271  	MOVOU	-32(AX)(CX*1), X14
   272  	MOVOU	-16(AX)(CX*1), X15
   273  
   274  	// xor in seed
   275  	PXOR	X0, X8
   276  	PXOR	X1, X9
   277  	PXOR	X2, X10
   278  	PXOR	X3, X11
   279  	PXOR	X4, X12
   280  	PXOR	X5, X13
   281  	PXOR	X6, X14
   282  	PXOR	X7, X15
   283  
   284  	// compute number of remaining 128-byte blocks
   285  	DECQ	CX
   286  	SHRQ	$7, CX
   287  
   288  	PCALIGN $16
   289  aesloop:
   290  	// scramble state
   291  	AESENC	X8, X8
   292  	AESENC	X9, X9
   293  	AESENC	X10, X10
   294  	AESENC	X11, X11
   295  	AESENC	X12, X12
   296  	AESENC	X13, X13
   297  	AESENC	X14, X14
   298  	AESENC	X15, X15
   299  
   300  	// scramble state, xor in a block
   301  	MOVOU	(AX), X0
   302  	MOVOU	16(AX), X1
   303  	MOVOU	32(AX), X2
   304  	MOVOU	48(AX), X3
   305  	AESENC	X0, X8
   306  	AESENC	X1, X9
   307  	AESENC	X2, X10
   308  	AESENC	X3, X11
   309  	MOVOU	64(AX), X4
   310  	MOVOU	80(AX), X5
   311  	MOVOU	96(AX), X6
   312  	MOVOU	112(AX), X7
   313  	AESENC	X4, X12
   314  	AESENC	X5, X13
   315  	AESENC	X6, X14
   316  	AESENC	X7, X15
   317  
   318  	ADDQ	$128, AX
   319  	DECQ	CX
   320  	JNE	aesloop
   321  
   322  	// 3 more scrambles to finish
   323  	AESENC	X8, X8
   324  	AESENC	X9, X9
   325  	AESENC	X10, X10
   326  	AESENC	X11, X11
   327  	AESENC	X12, X12
   328  	AESENC	X13, X13
   329  	AESENC	X14, X14
   330  	AESENC	X15, X15
   331  	AESENC	X8, X8
   332  	AESENC	X9, X9
   333  	AESENC	X10, X10
   334  	AESENC	X11, X11
   335  	AESENC	X12, X12
   336  	AESENC	X13, X13
   337  	AESENC	X14, X14
   338  	AESENC	X15, X15
   339  	AESENC	X8, X8
   340  	AESENC	X9, X9
   341  	AESENC	X10, X10
   342  	AESENC	X11, X11
   343  	AESENC	X12, X12
   344  	AESENC	X13, X13
   345  	AESENC	X14, X14
   346  	AESENC	X15, X15
   347  
   348  	PXOR	X12, X8
   349  	PXOR	X13, X9
   350  	PXOR	X14, X10
   351  	PXOR	X15, X11
   352  	PXOR	X10, X8
   353  	PXOR	X11, X9
   354  	PXOR	X9, X8
   355  	// X15 must be zero on return
   356  	PXOR	X15, X15
   357  	MOVQ	X8, AX	// return X8
   358  	RET
   359  
   360  // simple mask to get rid of data in the high part of the register.
   361  DATA masks<>+0x00(SB)/8, $0x0000000000000000
   362  DATA masks<>+0x08(SB)/8, $0x0000000000000000
   363  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
   364  DATA masks<>+0x18(SB)/8, $0x0000000000000000
   365  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
   366  DATA masks<>+0x28(SB)/8, $0x0000000000000000
   367  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
   368  DATA masks<>+0x38(SB)/8, $0x0000000000000000
   369  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
   370  DATA masks<>+0x48(SB)/8, $0x0000000000000000
   371  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
   372  DATA masks<>+0x58(SB)/8, $0x0000000000000000
   373  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
   374  DATA masks<>+0x68(SB)/8, $0x0000000000000000
   375  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
   376  DATA masks<>+0x78(SB)/8, $0x0000000000000000
   377  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
   378  DATA masks<>+0x88(SB)/8, $0x0000000000000000
   379  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
   380  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
   381  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
   382  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
   383  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
   384  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
   385  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
   386  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
   387  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
   388  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
   389  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
   390  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
   391  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
   392  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
   393  GLOBL masks<>(SB),RODATA,$256
   394  
   395  // these are arguments to pshufb. They move data down from
   396  // the high bytes of the register to the low bytes of the register.
   397  // index is how many bytes to move.
   398  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
   399  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
   400  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
   401  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
   402  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
   403  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
   404  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
   405  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
   406  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
   407  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
   408  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
   409  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
   410  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
   411  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
   412  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
   413  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
   414  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
   415  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
   416  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
   417  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
   418  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
   419  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
   420  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
   421  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
   422  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
   423  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
   424  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
   425  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
   426  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
   427  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
   428  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
   429  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
   430  GLOBL shifts<>(SB),RODATA,$256
   431  
   432  TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
   433  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
   434  	MOVQ	$masks<>(SB), AX
   435  	MOVQ	$shifts<>(SB), BX
   436  	ORQ	BX, AX
   437  	TESTQ	$15, AX
   438  	SETEQ	AX
   439  	RET
   440  

View as plain text