Text file
src/crypto/sha1/sha1block_loong64.s
1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA-1 block routine. See sha1block.go for Go equivalent.
10 //
11 // There are 80 rounds of 4 types:
12 // - rounds 0-15 are type 1 and load data (ROUND1 macro).
13 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
14 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
15 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
16 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
17 //
18 // Each round loads or shuffles the data, then computes a per-round
19 // function of b, c, d, and then mixes the result into and rotates the
20 // five registers a, b, c, d, e holding the intermediate results.
21 //
22 // The register rotation is implemented by rotating the arguments to
23 // the round macros instead of by explicit move instructions.
24
25 #define REGTMP R30
26 #define REGTMP1 R17
27 #define REGTMP2 R18
28 #define REGTMP3 R19
29
30 #define LOAD1(index) \
31 MOVW (index*4)(R5), REGTMP3; \
32 WORD $0x3a73; \ // REVB2W REGTMP3, REGTMP3 to big-endian
33 MOVW REGTMP3, (index*4)(R3)
34
35 #define LOAD(index) \
36 MOVW (((index)&0xf)*4)(R3), REGTMP3; \
37 MOVW (((index-3)&0xf)*4)(R3), REGTMP; \
38 MOVW (((index-8)&0xf)*4)(R3), REGTMP1; \
39 MOVW (((index-14)&0xf)*4)(R3), REGTMP2; \
40 XOR REGTMP, REGTMP3; \
41 XOR REGTMP1, REGTMP3; \
42 XOR REGTMP2, REGTMP3; \
43 ROTR $31, REGTMP3; \
44 MOVW REGTMP3, (((index)&0xf)*4)(R3)
45
46 // f = d ^ (b & (c ^ d))
47 #define FUNC1(a, b, c, d, e) \
48 XOR c, d, REGTMP1; \
49 AND b, REGTMP1; \
50 XOR d, REGTMP1
51
52 // f = b ^ c ^ d
53 #define FUNC2(a, b, c, d, e) \
54 XOR b, c, REGTMP1; \
55 XOR d, REGTMP1
56
57 // f = (b & c) | ((b | c) & d)
58 #define FUNC3(a, b, c, d, e) \
59 OR b, c, REGTMP2; \
60 AND b, c, REGTMP; \
61 AND d, REGTMP2; \
62 OR REGTMP, REGTMP2, REGTMP1
63
64 #define FUNC4 FUNC2
65
66 #define MIX(a, b, c, d, e, const) \
67 ROTR $2, b; \ // b << 30
68 ADD REGTMP1, e; \ // e = e + f
69 ROTR $27, a, REGTMP2; \ // a << 5
70 ADD REGTMP3, e; \ // e = e + w[i]
71 ADDV $const, e; \ // e = e + k
72 ADD REGTMP2, e // e = e + a<<5
73
74 #define ROUND1(a, b, c, d, e, index) \
75 LOAD1(index); \
76 FUNC1(a, b, c, d, e); \
77 MIX(a, b, c, d, e, 0x5A827999)
78
79 #define ROUND1x(a, b, c, d, e, index) \
80 LOAD(index); \
81 FUNC1(a, b, c, d, e); \
82 MIX(a, b, c, d, e, 0x5A827999)
83
84 #define ROUND2(a, b, c, d, e, index) \
85 LOAD(index); \
86 FUNC2(a, b, c, d, e); \
87 MIX(a, b, c, d, e, 0x6ED9EBA1)
88
89 #define ROUND3(a, b, c, d, e, index) \
90 LOAD(index); \
91 FUNC3(a, b, c, d, e); \
92 MIX(a, b, c, d, e, 0x8F1BBCDC)
93
94 #define ROUND4(a, b, c, d, e, index) \
95 LOAD(index); \
96 FUNC4(a, b, c, d, e); \
97 MIX(a, b, c, d, e, 0xCA62C1D6)
98
99 // A stack frame size of 64 bytes is required here, because
100 // the frame size used for data expansion is 64 bytes.
101 // See the definition of the macro LOAD above, and the definition
102 // of the local variable w in the general implementation (sha1block.go).
103 TEXT ·block(SB),NOSPLIT,$64-32
104 MOVV dig+0(FP), R4
105 MOVV p_base+8(FP), R5
106 MOVV p_len+16(FP), R6
107 AND $~63, R6
108 BEQ R6, zero
109
110 // p_len >= 64
111 ADDV R5, R6, R24
112 MOVW (0*4)(R4), R7
113 MOVW (1*4)(R4), R8
114 MOVW (2*4)(R4), R9
115 MOVW (3*4)(R4), R10
116 MOVW (4*4)(R4), R11
117
118 loop:
119 MOVW R7, R12
120 MOVW R8, R13
121 MOVW R9, R14
122 MOVW R10, R15
123 MOVW R11, R16
124
125 ROUND1(R7, R8, R9, R10, R11, 0)
126 ROUND1(R11, R7, R8, R9, R10, 1)
127 ROUND1(R10, R11, R7, R8, R9, 2)
128 ROUND1(R9, R10, R11, R7, R8, 3)
129 ROUND1(R8, R9, R10, R11, R7, 4)
130 ROUND1(R7, R8, R9, R10, R11, 5)
131 ROUND1(R11, R7, R8, R9, R10, 6)
132 ROUND1(R10, R11, R7, R8, R9, 7)
133 ROUND1(R9, R10, R11, R7, R8, 8)
134 ROUND1(R8, R9, R10, R11, R7, 9)
135 ROUND1(R7, R8, R9, R10, R11, 10)
136 ROUND1(R11, R7, R8, R9, R10, 11)
137 ROUND1(R10, R11, R7, R8, R9, 12)
138 ROUND1(R9, R10, R11, R7, R8, 13)
139 ROUND1(R8, R9, R10, R11, R7, 14)
140 ROUND1(R7, R8, R9, R10, R11, 15)
141
142 ROUND1x(R11, R7, R8, R9, R10, 16)
143 ROUND1x(R10, R11, R7, R8, R9, 17)
144 ROUND1x(R9, R10, R11, R7, R8, 18)
145 ROUND1x(R8, R9, R10, R11, R7, 19)
146
147 ROUND2(R7, R8, R9, R10, R11, 20)
148 ROUND2(R11, R7, R8, R9, R10, 21)
149 ROUND2(R10, R11, R7, R8, R9, 22)
150 ROUND2(R9, R10, R11, R7, R8, 23)
151 ROUND2(R8, R9, R10, R11, R7, 24)
152 ROUND2(R7, R8, R9, R10, R11, 25)
153 ROUND2(R11, R7, R8, R9, R10, 26)
154 ROUND2(R10, R11, R7, R8, R9, 27)
155 ROUND2(R9, R10, R11, R7, R8, 28)
156 ROUND2(R8, R9, R10, R11, R7, 29)
157 ROUND2(R7, R8, R9, R10, R11, 30)
158 ROUND2(R11, R7, R8, R9, R10, 31)
159 ROUND2(R10, R11, R7, R8, R9, 32)
160 ROUND2(R9, R10, R11, R7, R8, 33)
161 ROUND2(R8, R9, R10, R11, R7, 34)
162 ROUND2(R7, R8, R9, R10, R11, 35)
163 ROUND2(R11, R7, R8, R9, R10, 36)
164 ROUND2(R10, R11, R7, R8, R9, 37)
165 ROUND2(R9, R10, R11, R7, R8, 38)
166 ROUND2(R8, R9, R10, R11, R7, 39)
167
168 ROUND3(R7, R8, R9, R10, R11, 40)
169 ROUND3(R11, R7, R8, R9, R10, 41)
170 ROUND3(R10, R11, R7, R8, R9, 42)
171 ROUND3(R9, R10, R11, R7, R8, 43)
172 ROUND3(R8, R9, R10, R11, R7, 44)
173 ROUND3(R7, R8, R9, R10, R11, 45)
174 ROUND3(R11, R7, R8, R9, R10, 46)
175 ROUND3(R10, R11, R7, R8, R9, 47)
176 ROUND3(R9, R10, R11, R7, R8, 48)
177 ROUND3(R8, R9, R10, R11, R7, 49)
178 ROUND3(R7, R8, R9, R10, R11, 50)
179 ROUND3(R11, R7, R8, R9, R10, 51)
180 ROUND3(R10, R11, R7, R8, R9, 52)
181 ROUND3(R9, R10, R11, R7, R8, 53)
182 ROUND3(R8, R9, R10, R11, R7, 54)
183 ROUND3(R7, R8, R9, R10, R11, 55)
184 ROUND3(R11, R7, R8, R9, R10, 56)
185 ROUND3(R10, R11, R7, R8, R9, 57)
186 ROUND3(R9, R10, R11, R7, R8, 58)
187 ROUND3(R8, R9, R10, R11, R7, 59)
188
189 ROUND4(R7, R8, R9, R10, R11, 60)
190 ROUND4(R11, R7, R8, R9, R10, 61)
191 ROUND4(R10, R11, R7, R8, R9, 62)
192 ROUND4(R9, R10, R11, R7, R8, 63)
193 ROUND4(R8, R9, R10, R11, R7, 64)
194 ROUND4(R7, R8, R9, R10, R11, 65)
195 ROUND4(R11, R7, R8, R9, R10, 66)
196 ROUND4(R10, R11, R7, R8, R9, 67)
197 ROUND4(R9, R10, R11, R7, R8, 68)
198 ROUND4(R8, R9, R10, R11, R7, 69)
199 ROUND4(R7, R8, R9, R10, R11, 70)
200 ROUND4(R11, R7, R8, R9, R10, 71)
201 ROUND4(R10, R11, R7, R8, R9, 72)
202 ROUND4(R9, R10, R11, R7, R8, 73)
203 ROUND4(R8, R9, R10, R11, R7, 74)
204 ROUND4(R7, R8, R9, R10, R11, 75)
205 ROUND4(R11, R7, R8, R9, R10, 76)
206 ROUND4(R10, R11, R7, R8, R9, 77)
207 ROUND4(R9, R10, R11, R7, R8, 78)
208 ROUND4(R8, R9, R10, R11, R7, 79)
209
210 ADD R12, R7
211 ADD R13, R8
212 ADD R14, R9
213 ADD R15, R10
214 ADD R16, R11
215
216 ADDV $64, R5
217 BNE R5, R24, loop
218
219 end:
220 MOVW R7, (0*4)(R4)
221 MOVW R8, (1*4)(R4)
222 MOVW R9, (2*4)(R4)
223 MOVW R10, (3*4)(R4)
224 MOVW R11, (4*4)(R4)
225 zero:
226 RET
227
View as plain text