Text file
src/crypto/sha1/sha1block_riscv64.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 #define LOAD(index) \
10 MOVBU ((index*4)+0)(X29), X5; \
11 MOVBU ((index*4)+1)(X29), X6; \
12 MOVBU ((index*4)+2)(X29), X7; \
13 MOVBU ((index*4)+3)(X29), X8; \
14 SLL $24, X5; \
15 SLL $16, X6; \
16 OR X5, X6, X5; \
17 SLL $8, X7; \
18 OR X5, X7, X5; \
19 OR X5, X8, X5; \
20 MOVW X5, (index*4)(X19)
21
22 #define SHUFFLE(index) \
23 MOVWU (((index)&0xf)*4)(X19), X5; \
24 MOVWU (((index-3)&0xf)*4)(X19), X6; \
25 MOVWU (((index-8)&0xf)*4)(X19), X7; \
26 MOVWU (((index-14)&0xf)*4)(X19), X8; \
27 XOR X6, X5; \
28 XOR X7, X5; \
29 XOR X8, X5; \
30 RORW $31, X5; \
31 MOVW X5, (((index)&0xf)*4)(X19)
32
33 // f = d ^ (b & (c ^ d))
34 #define FUNC1(a, b, c, d, e) \
35 XOR c, d, X7; \
36 AND b, X7; \
37 XOR d, X7
38
39 // f = b ^ c ^ d
40 #define FUNC2(a, b, c, d, e) \
41 XOR b, c, X7; \
42 XOR d, X7
43
44 // f = (b & c) | ((b | c) & d)
45 #define FUNC3(a, b, c, d, e) \
46 OR b, c, X8; \
47 AND b, c, X6; \
48 AND d, X8; \
49 OR X6, X8, X7
50
51 #define FUNC4 FUNC2
52
53 #define MIX(a, b, c, d, e, key) \
54 RORW $2, b; \
55 ADD X7, e; \
56 RORW $27, a, X8; \
57 ADD X5, e; \
58 ADD key, e; \
59 ADD X8, e
60
61 #define ROUND1(a, b, c, d, e, index) \
62 LOAD(index); \
63 FUNC1(a, b, c, d, e); \
64 MIX(a, b, c, d, e, X15)
65
66 #define ROUND1x(a, b, c, d, e, index) \
67 SHUFFLE(index); \
68 FUNC1(a, b, c, d, e); \
69 MIX(a, b, c, d, e, X15)
70
71 #define ROUND2(a, b, c, d, e, index) \
72 SHUFFLE(index); \
73 FUNC2(a, b, c, d, e); \
74 MIX(a, b, c, d, e, X16)
75
76 #define ROUND3(a, b, c, d, e, index) \
77 SHUFFLE(index); \
78 FUNC3(a, b, c, d, e); \
79 MIX(a, b, c, d, e, X17)
80
81 #define ROUND4(a, b, c, d, e, index) \
82 SHUFFLE(index); \
83 FUNC4(a, b, c, d, e); \
84 MIX(a, b, c, d, e, X18)
85
86 // func block(dig *Digest, p []byte)
87 TEXT ·block(SB),NOSPLIT,$64-32
88 MOV p_base+8(FP), X29
89 MOV p_len+16(FP), X30
90 SRL $6, X30
91 SLL $6, X30
92
93 ADD X29, X30, X28
94 BEQ X28, X29, end
95
96 ADD $8, X2, X19 // message schedule buffer on stack
97
98 MOV dig+0(FP), X20
99 MOVWU (0*4)(X20), X10 // a = H0
100 MOVWU (1*4)(X20), X11 // b = H1
101 MOVWU (2*4)(X20), X12 // c = H2
102 MOVWU (3*4)(X20), X13 // d = H3
103 MOVWU (4*4)(X20), X14 // e = H4
104
105 MOV $·_K(SB), X21
106 MOVW (0*4)(X21), X15
107 MOVW (1*4)(X21), X16
108 MOVW (2*4)(X21), X17
109 MOVW (3*4)(X21), X18
110
111 loop:
112 MOVW X10, X22
113 MOVW X11, X23
114 MOVW X12, X24
115 MOVW X13, X25
116 MOVW X14, X26
117
118 ROUND1(X10, X11, X12, X13, X14, 0)
119 ROUND1(X14, X10, X11, X12, X13, 1)
120 ROUND1(X13, X14, X10, X11, X12, 2)
121 ROUND1(X12, X13, X14, X10, X11, 3)
122 ROUND1(X11, X12, X13, X14, X10, 4)
123 ROUND1(X10, X11, X12, X13, X14, 5)
124 ROUND1(X14, X10, X11, X12, X13, 6)
125 ROUND1(X13, X14, X10, X11, X12, 7)
126 ROUND1(X12, X13, X14, X10, X11, 8)
127 ROUND1(X11, X12, X13, X14, X10, 9)
128 ROUND1(X10, X11, X12, X13, X14, 10)
129 ROUND1(X14, X10, X11, X12, X13, 11)
130 ROUND1(X13, X14, X10, X11, X12, 12)
131 ROUND1(X12, X13, X14, X10, X11, 13)
132 ROUND1(X11, X12, X13, X14, X10, 14)
133 ROUND1(X10, X11, X12, X13, X14, 15)
134
135 ROUND1x(X14, X10, X11, X12, X13, 16)
136 ROUND1x(X13, X14, X10, X11, X12, 17)
137 ROUND1x(X12, X13, X14, X10, X11, 18)
138 ROUND1x(X11, X12, X13, X14, X10, 19)
139
140 ROUND2(X10, X11, X12, X13, X14, 20)
141 ROUND2(X14, X10, X11, X12, X13, 21)
142 ROUND2(X13, X14, X10, X11, X12, 22)
143 ROUND2(X12, X13, X14, X10, X11, 23)
144 ROUND2(X11, X12, X13, X14, X10, 24)
145 ROUND2(X10, X11, X12, X13, X14, 25)
146 ROUND2(X14, X10, X11, X12, X13, 26)
147 ROUND2(X13, X14, X10, X11, X12, 27)
148 ROUND2(X12, X13, X14, X10, X11, 28)
149 ROUND2(X11, X12, X13, X14, X10, 29)
150 ROUND2(X10, X11, X12, X13, X14, 30)
151 ROUND2(X14, X10, X11, X12, X13, 31)
152 ROUND2(X13, X14, X10, X11, X12, 32)
153 ROUND2(X12, X13, X14, X10, X11, 33)
154 ROUND2(X11, X12, X13, X14, X10, 34)
155 ROUND2(X10, X11, X12, X13, X14, 35)
156 ROUND2(X14, X10, X11, X12, X13, 36)
157 ROUND2(X13, X14, X10, X11, X12, 37)
158 ROUND2(X12, X13, X14, X10, X11, 38)
159 ROUND2(X11, X12, X13, X14, X10, 39)
160
161 ROUND3(X10, X11, X12, X13, X14, 40)
162 ROUND3(X14, X10, X11, X12, X13, 41)
163 ROUND3(X13, X14, X10, X11, X12, 42)
164 ROUND3(X12, X13, X14, X10, X11, 43)
165 ROUND3(X11, X12, X13, X14, X10, 44)
166 ROUND3(X10, X11, X12, X13, X14, 45)
167 ROUND3(X14, X10, X11, X12, X13, 46)
168 ROUND3(X13, X14, X10, X11, X12, 47)
169 ROUND3(X12, X13, X14, X10, X11, 48)
170 ROUND3(X11, X12, X13, X14, X10, 49)
171 ROUND3(X10, X11, X12, X13, X14, 50)
172 ROUND3(X14, X10, X11, X12, X13, 51)
173 ROUND3(X13, X14, X10, X11, X12, 52)
174 ROUND3(X12, X13, X14, X10, X11, 53)
175 ROUND3(X11, X12, X13, X14, X10, 54)
176 ROUND3(X10, X11, X12, X13, X14, 55)
177 ROUND3(X14, X10, X11, X12, X13, 56)
178 ROUND3(X13, X14, X10, X11, X12, 57)
179 ROUND3(X12, X13, X14, X10, X11, 58)
180 ROUND3(X11, X12, X13, X14, X10, 59)
181
182 ROUND4(X10, X11, X12, X13, X14, 60)
183 ROUND4(X14, X10, X11, X12, X13, 61)
184 ROUND4(X13, X14, X10, X11, X12, 62)
185 ROUND4(X12, X13, X14, X10, X11, 63)
186 ROUND4(X11, X12, X13, X14, X10, 64)
187 ROUND4(X10, X11, X12, X13, X14, 65)
188 ROUND4(X14, X10, X11, X12, X13, 66)
189 ROUND4(X13, X14, X10, X11, X12, 67)
190 ROUND4(X12, X13, X14, X10, X11, 68)
191 ROUND4(X11, X12, X13, X14, X10, 69)
192 ROUND4(X10, X11, X12, X13, X14, 70)
193 ROUND4(X14, X10, X11, X12, X13, 71)
194 ROUND4(X13, X14, X10, X11, X12, 72)
195 ROUND4(X12, X13, X14, X10, X11, 73)
196 ROUND4(X11, X12, X13, X14, X10, 74)
197 ROUND4(X10, X11, X12, X13, X14, 75)
198 ROUND4(X14, X10, X11, X12, X13, 76)
199 ROUND4(X13, X14, X10, X11, X12, 77)
200 ROUND4(X12, X13, X14, X10, X11, 78)
201 ROUND4(X11, X12, X13, X14, X10, 79)
202
203 ADD X22, X10
204 ADD X23, X11
205 ADD X24, X12
206 ADD X25, X13
207 ADD X26, X14
208
209 ADD $64, X29
210 BNE X28, X29, loop
211
212 end:
213 MOVW X10, (0*4)(X20)
214 MOVW X11, (1*4)(X20)
215 MOVW X12, (2*4)(X20)
216 MOVW X13, (3*4)(X20)
217 MOVW X14, (4*4)(X20)
218
219 RET
220
221 GLOBL ·_K(SB),RODATA,$16
222 DATA ·_K+0(SB)/4, $0x5A827999
223 DATA ·_K+4(SB)/4, $0x6ED9EBA1
224 DATA ·_K+8(SB)/4, $0x8F1BBCDC
225 DATA ·_K+12(SB)/4, $0xCA62C1D6
226
View as plain text