Text file src/internal/runtime/maps/memhash_386.s

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // hash function using AES hardware instructions
     8  TEXT ·memHash32AES(SB),NOSPLIT,$0-12
     9  	MOVL	p+0(FP), AX	// ptr to data
    10  	MOVL	h+4(FP), X0	// seed
    11  	PINSRD	$1, (AX), X0	// data
    12  	AESENC	·aeskeysched+0(SB), X0
    13  	AESENC	·aeskeysched+16(SB), X0
    14  	AESENC	·aeskeysched+32(SB), X0
    15  	MOVL	X0, ret+8(FP)
    16  	RET
    17  
    18  TEXT ·memHash64AES(SB),NOSPLIT,$0-12
    19  	MOVL	p+0(FP), AX	// ptr to data
    20  	MOVQ	(AX), X0	// data
    21  	PINSRD	$2, h+4(FP), X0	// seed
    22  	AESENC	·aeskeysched+0(SB), X0
    23  	AESENC	·aeskeysched+16(SB), X0
    24  	AESENC	·aeskeysched+32(SB), X0
    25  	MOVL	X0, ret+8(FP)
    26  	RET
    27  
    28  TEXT ·memHashAES(SB),NOSPLIT,$0-16
    29  	MOVL	p+0(FP), AX	// ptr to data
    30  	MOVL	s+8(FP), BX	// size
    31  	LEAL	ret+12(FP), DX
    32  	JMP	·aeshashbody<>(SB)
    33  
    34  TEXT ·strHashAES(SB),NOSPLIT,$0-12
    35  	MOVL	p+0(FP), AX	// ptr to string object
    36  	MOVL	4(AX), BX	// length of string
    37  	MOVL	(AX), AX	// string data
    38  	LEAL	ret+8(FP), DX
    39  	JMP	·aeshashbody<>(SB)
    40  
    41  // AX: data
    42  // BX: length
    43  // DX: address to put return value
    44  TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
    45  	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
    46  	PINSRW	$4, BX, X0	            // 16 bits of length
    47  	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
    48  	MOVO	X0, X1                      // save unscrambled seed
    49  	PXOR	·aeskeysched(SB), X0 // xor in per-process seed
    50  	AESENC	X0, X0                      // scramble seed
    51  
    52  	CMPL	BX, $16
    53  	JB	aes0to15
    54  	JE	aes16
    55  	CMPL	BX, $32
    56  	JBE	aes17to32
    57  	CMPL	BX, $64
    58  	JBE	aes33to64
    59  	JMP	aes65plus
    60  
    61  aes0to15:
    62  	TESTL	BX, BX
    63  	JE	aes0
    64  
    65  	ADDL	$16, AX
    66  	TESTW	$0xff0, AX
    67  	JE	endofpage
    68  
    69  	// 16 bytes loaded at this address won't cross
    70  	// a page boundary, so we can load it directly.
    71  	MOVOU	-16(AX), X1
    72  	ADDL	BX, BX
    73  	PAND	masks<>(SB)(BX*8), X1
    74  
    75  final1:
    76  	PXOR	X0, X1	// xor data with seed
    77  	AESENC	X1, X1  // scramble combo 3 times
    78  	AESENC	X1, X1
    79  	AESENC	X1, X1
    80  	MOVL	X1, (DX)
    81  	RET
    82  
    83  endofpage:
    84  	// address ends in 1111xxxx. Might be up against
    85  	// a page boundary, so load ending at last byte.
    86  	// Then shift bytes down using pshufb.
    87  	MOVOU	-32(AX)(BX*1), X1
    88  	ADDL	BX, BX
    89  	PSHUFB	shifts<>(SB)(BX*8), X1
    90  	JMP	final1
    91  
    92  aes0:
    93  	// Return scrambled input seed
    94  	AESENC	X0, X0
    95  	MOVL	X0, (DX)
    96  	RET
    97  
    98  aes16:
    99  	MOVOU	(AX), X1
   100  	JMP	final1
   101  
   102  aes17to32:
   103  	// make second starting seed
   104  	PXOR	·aeskeysched+16(SB), X1
   105  	AESENC	X1, X1
   106  
   107  	// load data to be hashed
   108  	MOVOU	(AX), X2
   109  	MOVOU	-16(AX)(BX*1), X3
   110  
   111  	// xor with seed
   112  	PXOR	X0, X2
   113  	PXOR	X1, X3
   114  
   115  	// scramble 3 times
   116  	AESENC	X2, X2
   117  	AESENC	X3, X3
   118  	AESENC	X2, X2
   119  	AESENC	X3, X3
   120  	AESENC	X2, X2
   121  	AESENC	X3, X3
   122  
   123  	// combine results
   124  	PXOR	X3, X2
   125  	MOVL	X2, (DX)
   126  	RET
   127  
   128  aes33to64:
   129  	// make 3 more starting seeds
   130  	MOVO	X1, X2
   131  	MOVO	X1, X3
   132  	PXOR	·aeskeysched+16(SB), X1
   133  	PXOR	·aeskeysched+32(SB), X2
   134  	PXOR	·aeskeysched+48(SB), X3
   135  	AESENC	X1, X1
   136  	AESENC	X2, X2
   137  	AESENC	X3, X3
   138  
   139  	MOVOU	(AX), X4
   140  	MOVOU	16(AX), X5
   141  	MOVOU	-32(AX)(BX*1), X6
   142  	MOVOU	-16(AX)(BX*1), X7
   143  
   144  	PXOR	X0, X4
   145  	PXOR	X1, X5
   146  	PXOR	X2, X6
   147  	PXOR	X3, X7
   148  
   149  	AESENC	X4, X4
   150  	AESENC	X5, X5
   151  	AESENC	X6, X6
   152  	AESENC	X7, X7
   153  
   154  	AESENC	X4, X4
   155  	AESENC	X5, X5
   156  	AESENC	X6, X6
   157  	AESENC	X7, X7
   158  
   159  	AESENC	X4, X4
   160  	AESENC	X5, X5
   161  	AESENC	X6, X6
   162  	AESENC	X7, X7
   163  
   164  	PXOR	X6, X4
   165  	PXOR	X7, X5
   166  	PXOR	X5, X4
   167  	MOVL	X4, (DX)
   168  	RET
   169  
   170  aes65plus:
   171  	// make 3 more starting seeds
   172  	MOVO	X1, X2
   173  	MOVO	X1, X3
   174  	PXOR	·aeskeysched+16(SB), X1
   175  	PXOR	·aeskeysched+32(SB), X2
   176  	PXOR	·aeskeysched+48(SB), X3
   177  	AESENC	X1, X1
   178  	AESENC	X2, X2
   179  	AESENC	X3, X3
   180  
   181  	// start with last (possibly overlapping) block
   182  	MOVOU	-64(AX)(BX*1), X4
   183  	MOVOU	-48(AX)(BX*1), X5
   184  	MOVOU	-32(AX)(BX*1), X6
   185  	MOVOU	-16(AX)(BX*1), X7
   186  
   187  	// scramble state once
   188  	AESENC	X0, X4
   189  	AESENC	X1, X5
   190  	AESENC	X2, X6
   191  	AESENC	X3, X7
   192  
   193  	// compute number of remaining 64-byte blocks
   194  	DECL	BX
   195  	SHRL	$6, BX
   196  
   197  aesloop:
   198  	// scramble state, xor in a block
   199  	MOVOU	(AX), X0
   200  	MOVOU	16(AX), X1
   201  	MOVOU	32(AX), X2
   202  	MOVOU	48(AX), X3
   203  	AESENC	X0, X4
   204  	AESENC	X1, X5
   205  	AESENC	X2, X6
   206  	AESENC	X3, X7
   207  
   208  	// scramble state
   209  	AESENC	X4, X4
   210  	AESENC	X5, X5
   211  	AESENC	X6, X6
   212  	AESENC	X7, X7
   213  
   214  	ADDL	$64, AX
   215  	DECL	BX
   216  	JNE	aesloop
   217  
   218  	// 3 more scrambles to finish
   219  	AESENC	X4, X4
   220  	AESENC	X5, X5
   221  	AESENC	X6, X6
   222  	AESENC	X7, X7
   223  
   224  	AESENC	X4, X4
   225  	AESENC	X5, X5
   226  	AESENC	X6, X6
   227  	AESENC	X7, X7
   228  
   229  	AESENC	X4, X4
   230  	AESENC	X5, X5
   231  	AESENC	X6, X6
   232  	AESENC	X7, X7
   233  
   234  	PXOR	X6, X4
   235  	PXOR	X7, X5
   236  	PXOR	X5, X4
   237  	MOVL	X4, (DX)
   238  	RET
   239  
   240  // simple mask to get rid of data in the high part of the register.
   241  DATA masks<>+0x00(SB)/4, $0x00000000
   242  DATA masks<>+0x04(SB)/4, $0x00000000
   243  DATA masks<>+0x08(SB)/4, $0x00000000
   244  DATA masks<>+0x0c(SB)/4, $0x00000000
   245  
   246  DATA masks<>+0x10(SB)/4, $0x000000ff
   247  DATA masks<>+0x14(SB)/4, $0x00000000
   248  DATA masks<>+0x18(SB)/4, $0x00000000
   249  DATA masks<>+0x1c(SB)/4, $0x00000000
   250  
   251  DATA masks<>+0x20(SB)/4, $0x0000ffff
   252  DATA masks<>+0x24(SB)/4, $0x00000000
   253  DATA masks<>+0x28(SB)/4, $0x00000000
   254  DATA masks<>+0x2c(SB)/4, $0x00000000
   255  
   256  DATA masks<>+0x30(SB)/4, $0x00ffffff
   257  DATA masks<>+0x34(SB)/4, $0x00000000
   258  DATA masks<>+0x38(SB)/4, $0x00000000
   259  DATA masks<>+0x3c(SB)/4, $0x00000000
   260  
   261  DATA masks<>+0x40(SB)/4, $0xffffffff
   262  DATA masks<>+0x44(SB)/4, $0x00000000
   263  DATA masks<>+0x48(SB)/4, $0x00000000
   264  DATA masks<>+0x4c(SB)/4, $0x00000000
   265  
   266  DATA masks<>+0x50(SB)/4, $0xffffffff
   267  DATA masks<>+0x54(SB)/4, $0x000000ff
   268  DATA masks<>+0x58(SB)/4, $0x00000000
   269  DATA masks<>+0x5c(SB)/4, $0x00000000
   270  
   271  DATA masks<>+0x60(SB)/4, $0xffffffff
   272  DATA masks<>+0x64(SB)/4, $0x0000ffff
   273  DATA masks<>+0x68(SB)/4, $0x00000000
   274  DATA masks<>+0x6c(SB)/4, $0x00000000
   275  
   276  DATA masks<>+0x70(SB)/4, $0xffffffff
   277  DATA masks<>+0x74(SB)/4, $0x00ffffff
   278  DATA masks<>+0x78(SB)/4, $0x00000000
   279  DATA masks<>+0x7c(SB)/4, $0x00000000
   280  
   281  DATA masks<>+0x80(SB)/4, $0xffffffff
   282  DATA masks<>+0x84(SB)/4, $0xffffffff
   283  DATA masks<>+0x88(SB)/4, $0x00000000
   284  DATA masks<>+0x8c(SB)/4, $0x00000000
   285  
   286  DATA masks<>+0x90(SB)/4, $0xffffffff
   287  DATA masks<>+0x94(SB)/4, $0xffffffff
   288  DATA masks<>+0x98(SB)/4, $0x000000ff
   289  DATA masks<>+0x9c(SB)/4, $0x00000000
   290  
   291  DATA masks<>+0xa0(SB)/4, $0xffffffff
   292  DATA masks<>+0xa4(SB)/4, $0xffffffff
   293  DATA masks<>+0xa8(SB)/4, $0x0000ffff
   294  DATA masks<>+0xac(SB)/4, $0x00000000
   295  
   296  DATA masks<>+0xb0(SB)/4, $0xffffffff
   297  DATA masks<>+0xb4(SB)/4, $0xffffffff
   298  DATA masks<>+0xb8(SB)/4, $0x00ffffff
   299  DATA masks<>+0xbc(SB)/4, $0x00000000
   300  
   301  DATA masks<>+0xc0(SB)/4, $0xffffffff
   302  DATA masks<>+0xc4(SB)/4, $0xffffffff
   303  DATA masks<>+0xc8(SB)/4, $0xffffffff
   304  DATA masks<>+0xcc(SB)/4, $0x00000000
   305  
   306  DATA masks<>+0xd0(SB)/4, $0xffffffff
   307  DATA masks<>+0xd4(SB)/4, $0xffffffff
   308  DATA masks<>+0xd8(SB)/4, $0xffffffff
   309  DATA masks<>+0xdc(SB)/4, $0x000000ff
   310  
   311  DATA masks<>+0xe0(SB)/4, $0xffffffff
   312  DATA masks<>+0xe4(SB)/4, $0xffffffff
   313  DATA masks<>+0xe8(SB)/4, $0xffffffff
   314  DATA masks<>+0xec(SB)/4, $0x0000ffff
   315  
   316  DATA masks<>+0xf0(SB)/4, $0xffffffff
   317  DATA masks<>+0xf4(SB)/4, $0xffffffff
   318  DATA masks<>+0xf8(SB)/4, $0xffffffff
   319  DATA masks<>+0xfc(SB)/4, $0x00ffffff
   320  
   321  GLOBL masks<>(SB),RODATA,$256
   322  
   323  // these are arguments to pshufb. They move data down from
   324  // the high bytes of the register to the low bytes of the register.
   325  // index is how many bytes to move.
   326  DATA shifts<>+0x00(SB)/4, $0x00000000
   327  DATA shifts<>+0x04(SB)/4, $0x00000000
   328  DATA shifts<>+0x08(SB)/4, $0x00000000
   329  DATA shifts<>+0x0c(SB)/4, $0x00000000
   330  
   331  DATA shifts<>+0x10(SB)/4, $0xffffff0f
   332  DATA shifts<>+0x14(SB)/4, $0xffffffff
   333  DATA shifts<>+0x18(SB)/4, $0xffffffff
   334  DATA shifts<>+0x1c(SB)/4, $0xffffffff
   335  
   336  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
   337  DATA shifts<>+0x24(SB)/4, $0xffffffff
   338  DATA shifts<>+0x28(SB)/4, $0xffffffff
   339  DATA shifts<>+0x2c(SB)/4, $0xffffffff
   340  
   341  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
   342  DATA shifts<>+0x34(SB)/4, $0xffffffff
   343  DATA shifts<>+0x38(SB)/4, $0xffffffff
   344  DATA shifts<>+0x3c(SB)/4, $0xffffffff
   345  
   346  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
   347  DATA shifts<>+0x44(SB)/4, $0xffffffff
   348  DATA shifts<>+0x48(SB)/4, $0xffffffff
   349  DATA shifts<>+0x4c(SB)/4, $0xffffffff
   350  
   351  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
   352  DATA shifts<>+0x54(SB)/4, $0xffffff0f
   353  DATA shifts<>+0x58(SB)/4, $0xffffffff
   354  DATA shifts<>+0x5c(SB)/4, $0xffffffff
   355  
   356  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
   357  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
   358  DATA shifts<>+0x68(SB)/4, $0xffffffff
   359  DATA shifts<>+0x6c(SB)/4, $0xffffffff
   360  
   361  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
   362  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
   363  DATA shifts<>+0x78(SB)/4, $0xffffffff
   364  DATA shifts<>+0x7c(SB)/4, $0xffffffff
   365  
   366  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
   367  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
   368  DATA shifts<>+0x88(SB)/4, $0xffffffff
   369  DATA shifts<>+0x8c(SB)/4, $0xffffffff
   370  
   371  DATA shifts<>+0x90(SB)/4, $0x0a090807
   372  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
   373  DATA shifts<>+0x98(SB)/4, $0xffffff0f
   374  DATA shifts<>+0x9c(SB)/4, $0xffffffff
   375  
   376  DATA shifts<>+0xa0(SB)/4, $0x09080706
   377  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
   378  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
   379  DATA shifts<>+0xac(SB)/4, $0xffffffff
   380  
   381  DATA shifts<>+0xb0(SB)/4, $0x08070605
   382  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
   383  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
   384  DATA shifts<>+0xbc(SB)/4, $0xffffffff
   385  
   386  DATA shifts<>+0xc0(SB)/4, $0x07060504
   387  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
   388  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
   389  DATA shifts<>+0xcc(SB)/4, $0xffffffff
   390  
   391  DATA shifts<>+0xd0(SB)/4, $0x06050403
   392  DATA shifts<>+0xd4(SB)/4, $0x0a090807
   393  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
   394  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
   395  
   396  DATA shifts<>+0xe0(SB)/4, $0x05040302
   397  DATA shifts<>+0xe4(SB)/4, $0x09080706
   398  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
   399  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
   400  
   401  DATA shifts<>+0xf0(SB)/4, $0x04030201
   402  DATA shifts<>+0xf4(SB)/4, $0x08070605
   403  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
   404  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
   405  
   406  GLOBL shifts<>(SB),RODATA,$256
   407  
   408  TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
   409  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
   410  	MOVL	$masks<>(SB), AX
   411  	MOVL	$shifts<>(SB), BX
   412  	ORL	BX, AX
   413  	TESTL	$15, AX
   414  	SETEQ   ret+0(FP)
   415  	RET
   416  

View as plain text