Text file src/crypto/sha1/sha1block_riscv64.s

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  #define LOAD(index) \
    10  	MOVBU	((index*4)+0)(X29), X5; \
    11  	MOVBU	((index*4)+1)(X29), X6; \
    12  	MOVBU	((index*4)+2)(X29), X7; \
    13  	MOVBU	((index*4)+3)(X29), X8; \
    14  	SLL	$24, X5; \
    15  	SLL	$16, X6; \
    16  	OR	X5, X6, X5; \
    17  	SLL	$8, X7; \
    18  	OR	X5, X7, X5; \
    19  	OR	X5, X8, X5; \
    20  	MOVW	X5, (index*4)(X19)
    21  
    22  #define SHUFFLE(index) \
    23  	MOVWU	(((index)&0xf)*4)(X19), X5; \
    24  	MOVWU	(((index-3)&0xf)*4)(X19), X6; \
    25  	MOVWU	(((index-8)&0xf)*4)(X19), X7; \
    26  	MOVWU	(((index-14)&0xf)*4)(X19), X8; \
    27  	XOR	X6, X5; \
    28  	XOR	X7, X5; \
    29  	XOR	X8, X5; \
    30  	RORW	$31, X5; \
    31  	MOVW	X5, (((index)&0xf)*4)(X19)
    32  
    33  // f = d ^ (b & (c ^ d))
    34  #define FUNC1(a, b, c, d, e) \
    35  	XOR	c, d, X7; \
    36  	AND	b, X7; \
    37  	XOR	d, X7
    38  
    39  // f = b ^ c ^ d
    40  #define FUNC2(a, b, c, d, e) \
    41  	XOR	b, c, X7; \
    42  	XOR	d, X7
    43  
    44  // f = (b & c) | ((b | c) & d)
    45  #define FUNC3(a, b, c, d, e) \
    46  	OR	b, c, X8; \
    47  	AND	b, c, X6; \
    48  	AND	d, X8; \
    49  	OR	X6, X8, X7
    50  
    51  #define FUNC4 FUNC2
    52  
    53  #define MIX(a, b, c, d, e, key) \
    54  	RORW	$2, b; \
    55  	ADD	X7, e; \
    56  	RORW	$27, a, X8; \
    57  	ADD	X5, e; \
    58  	ADD	key, e; \
    59  	ADD	X8, e
    60  
    61  #define ROUND1(a, b, c, d, e, index) \
    62  	LOAD(index); \
    63  	FUNC1(a, b, c, d, e); \
    64  	MIX(a, b, c, d, e, X15)
    65  
    66  #define ROUND1x(a, b, c, d, e, index) \
    67  	SHUFFLE(index); \
    68  	FUNC1(a, b, c, d, e); \
    69  	MIX(a, b, c, d, e, X15)
    70  
    71  #define ROUND2(a, b, c, d, e, index) \
    72  	SHUFFLE(index); \
    73  	FUNC2(a, b, c, d, e); \
    74  	MIX(a, b, c, d, e, X16)
    75  
    76  #define ROUND3(a, b, c, d, e, index) \
    77  	SHUFFLE(index); \
    78  	FUNC3(a, b, c, d, e); \
    79  	MIX(a, b, c, d, e, X17)
    80  
    81  #define ROUND4(a, b, c, d, e, index) \
    82  	SHUFFLE(index); \
    83  	FUNC4(a, b, c, d, e); \
    84  	MIX(a, b, c, d, e, X18)
    85  
    86  // func block(dig *Digest, p []byte)
    87  TEXT ·block(SB),NOSPLIT,$64-32
    88  	MOV	p_base+8(FP), X29
    89  	MOV	p_len+16(FP), X30
    90  	SRL	$6, X30
    91  	SLL	$6, X30
    92  
    93  	ADD	X29, X30, X28
    94  	BEQ	X28, X29, end
    95  
    96  	ADD	$8, X2, X19	// message schedule buffer on stack
    97  
    98  	MOV	dig+0(FP), X20
    99  	MOVWU	(0*4)(X20), X10	// a = H0
   100  	MOVWU	(1*4)(X20), X11	// b = H1
   101  	MOVWU	(2*4)(X20), X12	// c = H2
   102  	MOVWU	(3*4)(X20), X13	// d = H3
   103  	MOVWU	(4*4)(X20), X14	// e = H4
   104  
   105  	MOV	$·_K(SB), X21
   106  	MOVW	(0*4)(X21), X15
   107  	MOVW	(1*4)(X21), X16
   108  	MOVW	(2*4)(X21), X17
   109  	MOVW	(3*4)(X21), X18
   110  
   111  loop:
   112  	MOVW	X10, X22
   113  	MOVW	X11, X23
   114  	MOVW	X12, X24
   115  	MOVW	X13, X25
   116  	MOVW	X14, X26
   117  
   118  	ROUND1(X10, X11, X12, X13, X14, 0)
   119  	ROUND1(X14, X10, X11, X12, X13, 1)
   120  	ROUND1(X13, X14, X10, X11, X12, 2)
   121  	ROUND1(X12, X13, X14, X10, X11, 3)
   122  	ROUND1(X11, X12, X13, X14, X10, 4)
   123  	ROUND1(X10, X11, X12, X13, X14, 5)
   124  	ROUND1(X14, X10, X11, X12, X13, 6)
   125  	ROUND1(X13, X14, X10, X11, X12, 7)
   126  	ROUND1(X12, X13, X14, X10, X11, 8)
   127  	ROUND1(X11, X12, X13, X14, X10, 9)
   128  	ROUND1(X10, X11, X12, X13, X14, 10)
   129  	ROUND1(X14, X10, X11, X12, X13, 11)
   130  	ROUND1(X13, X14, X10, X11, X12, 12)
   131  	ROUND1(X12, X13, X14, X10, X11, 13)
   132  	ROUND1(X11, X12, X13, X14, X10, 14)
   133  	ROUND1(X10, X11, X12, X13, X14, 15)
   134  
   135  	ROUND1x(X14, X10, X11, X12, X13, 16)
   136  	ROUND1x(X13, X14, X10, X11, X12, 17)
   137  	ROUND1x(X12, X13, X14, X10, X11, 18)
   138  	ROUND1x(X11, X12, X13, X14, X10, 19)
   139  
   140  	ROUND2(X10, X11, X12, X13, X14, 20)
   141  	ROUND2(X14, X10, X11, X12, X13, 21)
   142  	ROUND2(X13, X14, X10, X11, X12, 22)
   143  	ROUND2(X12, X13, X14, X10, X11, 23)
   144  	ROUND2(X11, X12, X13, X14, X10, 24)
   145  	ROUND2(X10, X11, X12, X13, X14, 25)
   146  	ROUND2(X14, X10, X11, X12, X13, 26)
   147  	ROUND2(X13, X14, X10, X11, X12, 27)
   148  	ROUND2(X12, X13, X14, X10, X11, 28)
   149  	ROUND2(X11, X12, X13, X14, X10, 29)
   150  	ROUND2(X10, X11, X12, X13, X14, 30)
   151  	ROUND2(X14, X10, X11, X12, X13, 31)
   152  	ROUND2(X13, X14, X10, X11, X12, 32)
   153  	ROUND2(X12, X13, X14, X10, X11, 33)
   154  	ROUND2(X11, X12, X13, X14, X10, 34)
   155  	ROUND2(X10, X11, X12, X13, X14, 35)
   156  	ROUND2(X14, X10, X11, X12, X13, 36)
   157  	ROUND2(X13, X14, X10, X11, X12, 37)
   158  	ROUND2(X12, X13, X14, X10, X11, 38)
   159  	ROUND2(X11, X12, X13, X14, X10, 39)
   160  
   161  	ROUND3(X10, X11, X12, X13, X14, 40)
   162  	ROUND3(X14, X10, X11, X12, X13, 41)
   163  	ROUND3(X13, X14, X10, X11, X12, 42)
   164  	ROUND3(X12, X13, X14, X10, X11, 43)
   165  	ROUND3(X11, X12, X13, X14, X10, 44)
   166  	ROUND3(X10, X11, X12, X13, X14, 45)
   167  	ROUND3(X14, X10, X11, X12, X13, 46)
   168  	ROUND3(X13, X14, X10, X11, X12, 47)
   169  	ROUND3(X12, X13, X14, X10, X11, 48)
   170  	ROUND3(X11, X12, X13, X14, X10, 49)
   171  	ROUND3(X10, X11, X12, X13, X14, 50)
   172  	ROUND3(X14, X10, X11, X12, X13, 51)
   173  	ROUND3(X13, X14, X10, X11, X12, 52)
   174  	ROUND3(X12, X13, X14, X10, X11, 53)
   175  	ROUND3(X11, X12, X13, X14, X10, 54)
   176  	ROUND3(X10, X11, X12, X13, X14, 55)
   177  	ROUND3(X14, X10, X11, X12, X13, 56)
   178  	ROUND3(X13, X14, X10, X11, X12, 57)
   179  	ROUND3(X12, X13, X14, X10, X11, 58)
   180  	ROUND3(X11, X12, X13, X14, X10, 59)
   181  
   182  	ROUND4(X10, X11, X12, X13, X14, 60)
   183  	ROUND4(X14, X10, X11, X12, X13, 61)
   184  	ROUND4(X13, X14, X10, X11, X12, 62)
   185  	ROUND4(X12, X13, X14, X10, X11, 63)
   186  	ROUND4(X11, X12, X13, X14, X10, 64)
   187  	ROUND4(X10, X11, X12, X13, X14, 65)
   188  	ROUND4(X14, X10, X11, X12, X13, 66)
   189  	ROUND4(X13, X14, X10, X11, X12, 67)
   190  	ROUND4(X12, X13, X14, X10, X11, 68)
   191  	ROUND4(X11, X12, X13, X14, X10, 69)
   192  	ROUND4(X10, X11, X12, X13, X14, 70)
   193  	ROUND4(X14, X10, X11, X12, X13, 71)
   194  	ROUND4(X13, X14, X10, X11, X12, 72)
   195  	ROUND4(X12, X13, X14, X10, X11, 73)
   196  	ROUND4(X11, X12, X13, X14, X10, 74)
   197  	ROUND4(X10, X11, X12, X13, X14, 75)
   198  	ROUND4(X14, X10, X11, X12, X13, 76)
   199  	ROUND4(X13, X14, X10, X11, X12, 77)
   200  	ROUND4(X12, X13, X14, X10, X11, 78)
   201  	ROUND4(X11, X12, X13, X14, X10, 79)
   202  
   203  	ADD	X22, X10
   204  	ADD	X23, X11
   205  	ADD	X24, X12
   206  	ADD	X25, X13
   207  	ADD	X26, X14
   208  
   209  	ADD	$64, X29
   210  	BNE	X28, X29, loop
   211  
   212  end:
   213  	MOVW	X10, (0*4)(X20)
   214  	MOVW	X11, (1*4)(X20)
   215  	MOVW	X12, (2*4)(X20)
   216  	MOVW	X13, (3*4)(X20)
   217  	MOVW	X14, (4*4)(X20)
   218  
   219  	RET
   220  
   221  GLOBL	·_K(SB),RODATA,$16
   222  DATA	·_K+0(SB)/4, $0x5A827999
   223  DATA	·_K+4(SB)/4, $0x6ED9EBA1
   224  DATA	·_K+8(SB)/4, $0x8F1BBCDC
   225  DATA	·_K+12(SB)/4, $0xCA62C1D6
   226  

View as plain text