Text file src/crypto/sha1/sha1block_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA-1 block routine. See sha1block.go for Go equivalent.
    10  //
    11  // There are 80 rounds of 4 types:
    12  //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    13  //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    14  //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    15  //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    16  //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    17  //
    18  // Each round loads or shuffles the data, then computes a per-round
    19  // function of b, c, d, and then mixes the result into and rotates the
    20  // five registers a, b, c, d, e holding the intermediate results.
    21  //
    22  // The register rotation is implemented by rotating the arguments to
    23  // the round macros instead of by explicit move instructions.
    24  
    25  #define REGTMP	R30
    26  #define REGTMP1	R17
    27  #define REGTMP2	R18
    28  #define REGTMP3	R19
    29  
    30  #define LOAD1(index) \
    31  	MOVW	(index*4)(R5), REGTMP3; \
    32  	WORD	$0x3a73; \	// REVB2W REGTMP3, REGTMP3   to big-endian
    33  	MOVW	REGTMP3, (index*4)(R3)
    34  
    35  #define LOAD(index) \
    36  	MOVW	(((index)&0xf)*4)(R3), REGTMP3; \
    37  	MOVW	(((index-3)&0xf)*4)(R3), REGTMP; \
    38  	MOVW	(((index-8)&0xf)*4)(R3), REGTMP1; \
    39  	MOVW	(((index-14)&0xf)*4)(R3), REGTMP2; \
    40  	XOR	REGTMP, REGTMP3; \
    41  	XOR	REGTMP1, REGTMP3; \
    42  	XOR	REGTMP2, REGTMP3; \
    43  	ROTR	$31, REGTMP3; \
    44  	MOVW	REGTMP3, (((index)&0xf)*4)(R3)
    45  
    46  // f = d ^ (b & (c ^ d))
    47  #define FUNC1(a, b, c, d, e) \
    48  	XOR	c, d, REGTMP1; \
    49  	AND	b, REGTMP1; \
    50  	XOR	d, REGTMP1
    51  
    52  // f = b ^ c ^ d
    53  #define FUNC2(a, b, c, d, e) \
    54  	XOR	b, c, REGTMP1; \
    55  	XOR	d, REGTMP1
    56  
    57  // f = (b & c) | ((b | c) & d)
    58  #define FUNC3(a, b, c, d, e) \
    59  	OR	b, c, REGTMP2; \
    60  	AND	b, c, REGTMP; \
    61  	AND	d, REGTMP2; \
    62  	OR	REGTMP, REGTMP2, REGTMP1
    63  
    64  #define FUNC4 FUNC2
    65  
    66  #define MIX(a, b, c, d, e, const) \
    67  	ROTR	$2, b; \	// b << 30
    68  	ADD	REGTMP1, e; \	// e = e + f
    69  	ROTR	$27, a, REGTMP2; \	// a << 5
    70  	ADD	REGTMP3, e; \	// e = e + w[i]
    71  	ADDV	$const, e; \	// e = e + k
    72  	ADD	REGTMP2, e	// e = e + a<<5
    73  
    74  #define ROUND1(a, b, c, d, e, index) \
    75  	LOAD1(index); \
    76  	FUNC1(a, b, c, d, e); \
    77  	MIX(a, b, c, d, e, 0x5A827999)
    78  
    79  #define ROUND1x(a, b, c, d, e, index) \
    80  	LOAD(index); \
    81  	FUNC1(a, b, c, d, e); \
    82  	MIX(a, b, c, d, e, 0x5A827999)
    83  
    84  #define ROUND2(a, b, c, d, e, index) \
    85  	LOAD(index); \
    86  	FUNC2(a, b, c, d, e); \
    87  	MIX(a, b, c, d, e, 0x6ED9EBA1)
    88  
    89  #define ROUND3(a, b, c, d, e, index) \
    90  	LOAD(index); \
    91  	FUNC3(a, b, c, d, e); \
    92  	MIX(a, b, c, d, e, 0x8F1BBCDC)
    93  
    94  #define ROUND4(a, b, c, d, e, index) \
    95  	LOAD(index); \
    96  	FUNC4(a, b, c, d, e); \
    97  	MIX(a, b, c, d, e, 0xCA62C1D6)
    98  
    99  // A stack frame size of 64 bytes is required here, because
   100  // the frame size used for data expansion is 64 bytes.
   101  // See the definition of the macro LOAD above, and the definition
   102  // of the local variable w in the general implementation (sha1block.go).
   103  TEXT ·block(SB),NOSPLIT,$64-32
   104  	MOVV	dig+0(FP),	R4
   105  	MOVV	p_base+8(FP),	R5
   106  	MOVV	p_len+16(FP),	R6
   107  	AND	$~63, R6
   108  	BEQ	R6, zero
   109  
   110  	// p_len >= 64
   111  	ADDV    R5, R6, R24
   112  	MOVW	(0*4)(R4), R7
   113  	MOVW	(1*4)(R4), R8
   114  	MOVW	(2*4)(R4), R9
   115  	MOVW	(3*4)(R4), R10
   116  	MOVW	(4*4)(R4), R11
   117  
   118  loop:
   119  	MOVW	R7,	R12
   120  	MOVW	R8,	R13
   121  	MOVW	R9,	R14
   122  	MOVW	R10,	R15
   123  	MOVW	R11,	R16
   124  
   125  	ROUND1(R7,  R8,  R9,  R10, R11, 0)
   126  	ROUND1(R11, R7,  R8,  R9,  R10, 1)
   127  	ROUND1(R10, R11, R7,  R8,  R9,  2)
   128  	ROUND1(R9,  R10, R11, R7,  R8,  3)
   129  	ROUND1(R8,  R9,  R10, R11, R7,  4)
   130  	ROUND1(R7,  R8,  R9,  R10, R11, 5)
   131  	ROUND1(R11, R7,  R8,  R9,  R10, 6)
   132  	ROUND1(R10, R11, R7,  R8,  R9,  7)
   133  	ROUND1(R9,  R10, R11, R7,  R8,  8)
   134  	ROUND1(R8,  R9,  R10, R11, R7,  9)
   135  	ROUND1(R7,  R8,  R9,  R10, R11, 10)
   136  	ROUND1(R11, R7,  R8,  R9,  R10, 11)
   137  	ROUND1(R10, R11, R7,  R8,  R9,  12)
   138  	ROUND1(R9,  R10, R11, R7,  R8,  13)
   139  	ROUND1(R8,  R9,  R10, R11, R7,  14)
   140  	ROUND1(R7,  R8,  R9,  R10, R11, 15)
   141  
   142  	ROUND1x(R11, R7,  R8,  R9,  R10, 16)
   143  	ROUND1x(R10, R11, R7,  R8,  R9,  17)
   144  	ROUND1x(R9,  R10, R11, R7,  R8,  18)
   145  	ROUND1x(R8,  R9,  R10, R11, R7,  19)
   146  
   147  	ROUND2(R7,  R8,  R9,  R10, R11, 20)
   148  	ROUND2(R11, R7,  R8,  R9,  R10, 21)
   149  	ROUND2(R10, R11, R7,  R8,  R9,  22)
   150  	ROUND2(R9,  R10, R11, R7,  R8,  23)
   151  	ROUND2(R8,  R9,  R10, R11, R7,  24)
   152  	ROUND2(R7,  R8,  R9,  R10, R11, 25)
   153  	ROUND2(R11, R7,  R8,  R9,  R10, 26)
   154  	ROUND2(R10, R11, R7,  R8,  R9,  27)
   155  	ROUND2(R9,  R10, R11, R7,  R8,  28)
   156  	ROUND2(R8,  R9,  R10, R11, R7,  29)
   157  	ROUND2(R7,  R8,  R9,  R10, R11, 30)
   158  	ROUND2(R11, R7,  R8,  R9,  R10, 31)
   159  	ROUND2(R10, R11, R7,  R8,  R9,  32)
   160  	ROUND2(R9,  R10, R11, R7,  R8,  33)
   161  	ROUND2(R8,  R9,  R10, R11, R7,  34)
   162  	ROUND2(R7,  R8,  R9,  R10, R11, 35)
   163  	ROUND2(R11, R7,  R8,  R9,  R10, 36)
   164  	ROUND2(R10, R11, R7,  R8,  R9,  37)
   165  	ROUND2(R9,  R10, R11, R7,  R8,  38)
   166  	ROUND2(R8,  R9,  R10, R11, R7,  39)
   167  
   168  	ROUND3(R7,  R8,  R9,  R10, R11, 40)
   169  	ROUND3(R11, R7,  R8,  R9,  R10, 41)
   170  	ROUND3(R10, R11, R7,  R8,  R9,  42)
   171  	ROUND3(R9,  R10, R11, R7,  R8,  43)
   172  	ROUND3(R8,  R9,  R10, R11, R7,  44)
   173  	ROUND3(R7,  R8,  R9,  R10, R11, 45)
   174  	ROUND3(R11, R7,  R8,  R9,  R10, 46)
   175  	ROUND3(R10, R11, R7,  R8,  R9,  47)
   176  	ROUND3(R9,  R10, R11, R7,  R8,  48)
   177  	ROUND3(R8,  R9,  R10, R11, R7,  49)
   178  	ROUND3(R7,  R8,  R9,  R10, R11, 50)
   179  	ROUND3(R11, R7,  R8,  R9,  R10, 51)
   180  	ROUND3(R10, R11, R7,  R8,  R9,  52)
   181  	ROUND3(R9,  R10, R11, R7,  R8,  53)
   182  	ROUND3(R8,  R9,  R10, R11, R7,  54)
   183  	ROUND3(R7,  R8,  R9,  R10, R11, 55)
   184  	ROUND3(R11, R7,  R8,  R9,  R10, 56)
   185  	ROUND3(R10, R11, R7,  R8,  R9,  57)
   186  	ROUND3(R9,  R10, R11, R7,  R8,  58)
   187  	ROUND3(R8,  R9,  R10, R11, R7,  59)
   188  
   189  	ROUND4(R7,  R8,  R9,  R10, R11, 60)
   190  	ROUND4(R11, R7,  R8,  R9,  R10, 61)
   191  	ROUND4(R10, R11, R7,  R8,  R9,  62)
   192  	ROUND4(R9,  R10, R11, R7,  R8,  63)
   193  	ROUND4(R8,  R9,  R10, R11, R7,  64)
   194  	ROUND4(R7,  R8,  R9,  R10, R11, 65)
   195  	ROUND4(R11, R7,  R8,  R9,  R10, 66)
   196  	ROUND4(R10, R11, R7,  R8,  R9,  67)
   197  	ROUND4(R9,  R10, R11, R7,  R8,  68)
   198  	ROUND4(R8,  R9,  R10, R11, R7,  69)
   199  	ROUND4(R7,  R8,  R9,  R10, R11, 70)
   200  	ROUND4(R11, R7,  R8,  R9,  R10, 71)
   201  	ROUND4(R10, R11, R7,  R8,  R9,  72)
   202  	ROUND4(R9,  R10, R11, R7,  R8,  73)
   203  	ROUND4(R8,  R9,  R10, R11, R7,  74)
   204  	ROUND4(R7,  R8,  R9,  R10, R11, 75)
   205  	ROUND4(R11, R7,  R8,  R9,  R10, 76)
   206  	ROUND4(R10, R11, R7,  R8,  R9,  77)
   207  	ROUND4(R9,  R10, R11, R7,  R8,  78)
   208  	ROUND4(R8,  R9,  R10, R11, R7,  79)
   209  
   210  	ADD	R12, R7
   211  	ADD	R13, R8
   212  	ADD	R14, R9
   213  	ADD	R15, R10
   214  	ADD	R16, R11
   215  
   216  	ADDV	$64, R5
   217  	BNE	R5, R24, loop
   218  
   219  end:
   220  	MOVW	R7, (0*4)(R4)
   221  	MOVW	R8, (1*4)(R4)
   222  	MOVW	R9, (2*4)(R4)
   223  	MOVW	R10, (3*4)(R4)
   224  	MOVW	R11, (4*4)(R4)
   225  zero:
   226  	RET
   227  

View as plain text