1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA256 block routine. See sha256block.go for Go equivalent.
10 //
11 // The algorithm is detailed in FIPS 180-4:
12 //
13 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
14 //
15 // W[i] = M[i]; for 0 <= i <= 15
16 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
17 //
18 // a = H0
19 // b = H1
20 // c = H2
21 // d = H3
22 // e = H4
23 // f = H5
24 // g = H6
25 // h = H7
26 //
27 // for i = 0 to 63 {
28 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + K[i] + W[i]
29 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
30 // h = g
31 // g = f
32 // f = e
33 // e = d + T1
34 // d = c
35 // c = b
36 // b = a
37 // a = T1 + T2
38 // }
39 //
40 // H0 = a + H0
41 // H1 = b + H1
42 // H2 = c + H2
43 // H3 = d + H3
44 // H4 = e + H4
45 // H5 = f + H5
46 // H6 = g + H6
47 // H7 = h + H7
48
49 #define REGTMP R30
50 #define REGTMP1 R16
51 #define REGTMP2 R17
52 #define REGTMP3 R18
53 #define REGTMP4 R7
54 #define REGTMP5 R6
55
56 // W[i] = M[i]; for 0 <= i <= 15
57 #define LOAD0(index) \
58 MOVW (index*4)(R5), REGTMP4; \
59 WORD $0x38e7; \ // REVB2W REGTMP4, REGTMP4 to big-endian
60 MOVW REGTMP4, (index*4)(R3)
61
62 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
63 // SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
64 // SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
65 #define LOAD1(index) \
66 MOVW (((index-2)&0xf)*4)(R3), REGTMP4; \
67 MOVW (((index-15)&0xf)*4)(R3), REGTMP1; \
68 MOVW (((index-7)&0xf)*4)(R3), REGTMP; \
69 MOVW REGTMP4, REGTMP2; \
70 MOVW REGTMP4, REGTMP3; \
71 ROTR $17, REGTMP4; \
72 ROTR $19, REGTMP2; \
73 SRL $10, REGTMP3; \
74 XOR REGTMP2, REGTMP4; \
75 XOR REGTMP3, REGTMP4; \
76 ROTR $7, REGTMP1, REGTMP5; \
77 SRL $3, REGTMP1, REGTMP3; \
78 ROTR $18, REGTMP1, REGTMP2; \
79 ADD REGTMP, REGTMP4; \
80 MOVW (((index-16)&0xf)*4)(R3), REGTMP; \
81 XOR REGTMP3, REGTMP5; \
82 XOR REGTMP2, REGTMP5; \
83 ADD REGTMP, REGTMP5; \
84 ADD REGTMP5, REGTMP4; \
85 MOVW REGTMP4, ((index&0xf)*4)(R3)
86
87 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
88 // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
89 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
90 // Calculate T1 in REGTMP4
91 #define SHA256T1(const, e, f, g, h) \
92 ADDV $const, h; \
93 ADD REGTMP4, h; \
94 ROTR $6, e, REGTMP4; \
95 ROTR $11, e, REGTMP; \
96 ROTR $25, e, REGTMP3; \
97 AND f, e, REGTMP2; \
98 XOR REGTMP, REGTMP4; \
99 MOVV $0xffffffff, REGTMP; \
100 XOR REGTMP4, REGTMP3; \
101 XOR REGTMP, e, REGTMP5; \
102 ADD REGTMP3, h; \
103 AND g, REGTMP5; \
104 XOR REGTMP2, REGTMP5; \
105 ADD h, REGTMP5, REGTMP4
106
107 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
108 // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
109 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
110 // Calculate T2 in REGTMP1
111 #define SHA256T2(a, b, c) \
112 ROTR $2, a, REGTMP5; \
113 AND b, c, REGTMP1; \
114 ROTR $13, a, REGTMP3; \
115 AND c, a, REGTMP; \
116 XOR REGTMP3, REGTMP5; \
117 XOR REGTMP, REGTMP1; \
118 ROTR $22, a, REGTMP2; \
119 AND a, b, REGTMP3; \
120 XOR REGTMP2, REGTMP5; \
121 XOR REGTMP3, REGTMP1; \
122 ADD REGTMP5, REGTMP1
123
124 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
125 // The values for e and a are stored in d and h, ready for rotation.
126 #define SHA256ROUND(const, a, b, c, d, e, f, g, h) \
127 SHA256T1(const, e, f, g, h); \
128 SHA256T2(a, b, c); \
129 ADD REGTMP4, d; \
130 ADD REGTMP1, REGTMP4, h
131
132 #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
133 LOAD0(index); \
134 SHA256ROUND(const, a, b, c, d, e, f, g, h)
135
136 #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
137 LOAD1(index); \
138 SHA256ROUND(const, a, b, c, d, e, f, g, h)
139
140 // A stack frame size of 64 bytes is required here, because
141 // the frame size used for data expansion is 64 bytes.
142 // See the definition of the macro LOAD1 above (4 bytes * 16 entries).
143 //
144 //func block(dig *Digest, p []byte)
145 TEXT ·block(SB),NOSPLIT,$64-32
146 MOVV p_base+8(FP), R5
147 MOVV p_len+16(FP), R6
148 AND $~63, R6
149 BEQ R6, end
150
151 // p_len >= 64
152 MOVV dig+0(FP), R4
153 ADDV R5, R6, R25
154 MOVW (0*4)(R4), R8 // a = H0
155 MOVW (1*4)(R4), R9 // b = H1
156 MOVW (2*4)(R4), R10 // c = H2
157 MOVW (3*4)(R4), R11 // d = H3
158 MOVW (4*4)(R4), R12 // e = H4
159 MOVW (5*4)(R4), R13 // f = H5
160 MOVW (6*4)(R4), R14 // g = H6
161 MOVW (7*4)(R4), R15 // h = H7
162
163 loop:
164 SHA256ROUND0(0, 0x428a2f98, R8, R9, R10, R11, R12, R13, R14, R15)
165 SHA256ROUND0(1, 0x71374491, R15, R8, R9, R10, R11, R12, R13, R14)
166 SHA256ROUND0(2, 0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13)
167 SHA256ROUND0(3, 0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12)
168 SHA256ROUND0(4, 0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11)
169 SHA256ROUND0(5, 0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10)
170 SHA256ROUND0(6, 0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9)
171 SHA256ROUND0(7, 0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8)
172 SHA256ROUND0(8, 0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15)
173 SHA256ROUND0(9, 0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14)
174 SHA256ROUND0(10, 0x243185be, R14, R15, R8, R9, R10, R11, R12, R13)
175 SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12)
176 SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11)
177 SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10)
178 SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9)
179 SHA256ROUND0(15, 0xc19bf174, R9, R10, R11, R12, R13, R14, R15, R8)
180
181 SHA256ROUND1(16, 0xe49b69c1, R8, R9, R10, R11, R12, R13, R14, R15)
182 SHA256ROUND1(17, 0xefbe4786, R15, R8, R9, R10, R11, R12, R13, R14)
183 SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8, R9, R10, R11, R12, R13)
184 SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8, R9, R10, R11, R12)
185 SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8, R9, R10, R11)
186 SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8, R9, R10)
187 SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8, R9)
188 SHA256ROUND1(23, 0x76f988da, R9, R10, R11, R12, R13, R14, R15, R8)
189 SHA256ROUND1(24, 0x983e5152, R8, R9, R10, R11, R12, R13, R14, R15)
190 SHA256ROUND1(25, 0xa831c66d, R15, R8, R9, R10, R11, R12, R13, R14)
191 SHA256ROUND1(26, 0xb00327c8, R14, R15, R8, R9, R10, R11, R12, R13)
192 SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8, R9, R10, R11, R12)
193 SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8, R9, R10, R11)
194 SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8, R9, R10)
195 SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8, R9)
196 SHA256ROUND1(31, 0x14292967, R9, R10, R11, R12, R13, R14, R15, R8)
197 SHA256ROUND1(32, 0x27b70a85, R8, R9, R10, R11, R12, R13, R14, R15)
198 SHA256ROUND1(33, 0x2e1b2138, R15, R8, R9, R10, R11, R12, R13, R14)
199 SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8, R9, R10, R11, R12, R13)
200 SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8, R9, R10, R11, R12)
201 SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8, R9, R10, R11)
202 SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8, R9, R10)
203 SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8, R9)
204 SHA256ROUND1(39, 0x92722c85, R9, R10, R11, R12, R13, R14, R15, R8)
205 SHA256ROUND1(40, 0xa2bfe8a1, R8, R9, R10, R11, R12, R13, R14, R15)
206 SHA256ROUND1(41, 0xa81a664b, R15, R8, R9, R10, R11, R12, R13, R14)
207 SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8, R9, R10, R11, R12, R13)
208 SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8, R9, R10, R11, R12)
209 SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8, R9, R10, R11)
210 SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8, R9, R10)
211 SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8, R9)
212 SHA256ROUND1(47, 0x106aa070, R9, R10, R11, R12, R13, R14, R15, R8)
213 SHA256ROUND1(48, 0x19a4c116, R8, R9, R10, R11, R12, R13, R14, R15)
214 SHA256ROUND1(49, 0x1e376c08, R15, R8, R9, R10, R11, R12, R13, R14)
215 SHA256ROUND1(50, 0x2748774c, R14, R15, R8, R9, R10, R11, R12, R13)
216 SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8, R9, R10, R11, R12)
217 SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8, R9, R10, R11)
218 SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8, R9, R10)
219 SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8, R9)
220 SHA256ROUND1(55, 0x682e6ff3, R9, R10, R11, R12, R13, R14, R15, R8)
221 SHA256ROUND1(56, 0x748f82ee, R8, R9, R10, R11, R12, R13, R14, R15)
222 SHA256ROUND1(57, 0x78a5636f, R15, R8, R9, R10, R11, R12, R13, R14)
223 SHA256ROUND1(58, 0x84c87814, R14, R15, R8, R9, R10, R11, R12, R13)
224 SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8, R9, R10, R11, R12)
225 SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8, R9, R10, R11)
226 SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8, R9, R10)
227 SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8, R9)
228 SHA256ROUND1(63, 0xc67178f2, R9, R10, R11, R12, R13, R14, R15, R8)
229
230 MOVW (0*4)(R4), REGTMP
231 MOVW (1*4)(R4), REGTMP1
232 MOVW (2*4)(R4), REGTMP2
233 MOVW (3*4)(R4), REGTMP3
234 ADD REGTMP, R8 // H0 = a + H0
235 ADD REGTMP1, R9 // H1 = b + H1
236 ADD REGTMP2, R10 // H2 = c + H2
237 ADD REGTMP3, R11 // H3 = d + H3
238 MOVW R8, (0*4)(R4)
239 MOVW R9, (1*4)(R4)
240 MOVW R10, (2*4)(R4)
241 MOVW R11, (3*4)(R4)
242 MOVW (4*4)(R4), REGTMP
243 MOVW (5*4)(R4), REGTMP1
244 MOVW (6*4)(R4), REGTMP2
245 MOVW (7*4)(R4), REGTMP3
246 ADD REGTMP, R12 // H4 = e + H4
247 ADD REGTMP1, R13 // H5 = f + H5
248 ADD REGTMP2, R14 // H6 = g + H6
249 ADD REGTMP3, R15 // H7 = h + H7
250 MOVW R12, (4*4)(R4)
251 MOVW R13, (5*4)(R4)
252 MOVW R14, (6*4)(R4)
253 MOVW R15, (7*4)(R4)
254
255 ADDV $64, R5
256 BNE R5, R25, loop
257
258 end:
259 RET
260
View as plain text