1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA512 block routine. See sha512block.go for Go equivalent.
10 //
11 // The algorithm is detailed in FIPS 180-4:
12 //
13 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
14 //
15 // Wt = Mt; for 0 <= t <= 15
16 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
17 //
18 // a = H0
19 // b = H1
20 // c = H2
21 // d = H3
22 // e = H4
23 // f = H5
24 // g = H6
25 // h = H7
26 //
27 // for t = 0 to 79 {
28 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
29 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
30 // h = g
31 // g = f
32 // f = e
33 // e = d + T1
34 // d = c
35 // c = b
36 // b = a
37 // a = T1 + T2
38 // }
39 //
40 // H0 = a + H0
41 // H1 = b + H1
42 // H2 = c + H2
43 // H3 = d + H3
44 // H4 = e + H4
45 // H5 = f + H5
46 // H6 = g + H6
47 // H7 = h + H7
48
49 // Wt = Mt; for 0 <= t <= 15
50 #define MSGSCHEDULE0(index) \
51 MOVBU ((index*8)+0)(X29), X5; \
52 MOVBU ((index*8)+1)(X29), X6; \
53 MOVBU ((index*8)+2)(X29), X7; \
54 MOVBU ((index*8)+3)(X29), X8; \
55 SLL $56, X5; \
56 SLL $48, X6; \
57 OR X5, X6, X5; \
58 SLL $40, X7; \
59 OR X5, X7, X5; \
60 SLL $32, X8; \
61 OR X5, X8, X5; \
62 MOVBU ((index*8)+4)(X29), X9; \
63 MOVBU ((index*8)+5)(X29), X6; \
64 MOVBU ((index*8)+6)(X29), X7; \
65 MOVBU ((index*8)+7)(X29), X8; \
66 SLL $24, X9; \
67 OR X5, X9, X5; \
68 SLL $16, X6; \
69 OR X5, X6, X5; \
70 SLL $8, X7; \
71 OR X5, X7, X5; \
72 OR X5, X8, X5; \
73 MOV X5, (index*8)(X19)
74
75 // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
76 // SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
77 // SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
78 #define MSGSCHEDULE1(index) \
79 MOV (((index-2)&0xf)*8)(X19), X5; \
80 MOV (((index-15)&0xf)*8)(X19), X6; \
81 MOV (((index-7)&0xf)*8)(X19), X9; \
82 MOV (((index-16)&0xf)*8)(X19), X21; \
83 ROR $19, X5, X7; \
84 ROR $61, X5, X8; \
85 SRL $6, X5; \
86 XOR X7, X5; \
87 XOR X8, X5; \
88 ADD X9, X5; \
89 ROR $1, X6, X7; \
90 ROR $8, X6, X8; \
91 SRL $7, X6; \
92 XOR X7, X6; \
93 XOR X8, X6; \
94 ADD X6, X5; \
95 ADD X21, X5; \
96 MOV X5, ((index&0xf)*8)(X19)
97
98 // Calculate T1 in X5.
99 // h is also used as an accumulator. Wt is passed in X5.
100 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
101 // BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
102 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
103 #define SHA512T1(index, e, f, g, h) \
104 MOV (index*8)(X18), X8; \
105 ADD X5, h; \
106 ROR $14, e, X6; \
107 ADD X8, h; \
108 ROR $18, e, X7; \
109 XOR X7, X6; \
110 ROR $41, e, X8; \
111 XOR X8, X6; \
112 ADD X6, h; \
113 AND e, f, X5; \
114 NOT e, X7; \
115 AND g, X7; \
116 XOR X7, X5; \
117 ADD h, X5
118
119 // Calculate T2 in X6.
120 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
121 // BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
122 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
123 #define SHA512T2(a, b, c) \
124 ROR $28, a, X6; \
125 ROR $34, a, X7; \
126 XOR X7, X6; \
127 ROR $39, a, X8; \
128 XOR X8, X6; \
129 AND a, b, X7; \
130 AND a, c, X8; \
131 XOR X8, X7; \
132 AND b, c, X9; \
133 XOR X9, X7; \
134 ADD X7, X6
135
136 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
137 // The values for e and a are stored in d and h, ready for rotation.
138 #define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
139 SHA512T1(index, e, f, g, h); \
140 SHA512T2(a, b, c); \
141 MOV X6, h; \
142 ADD X5, d; \
143 ADD X5, h
144
145 #define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
146 MSGSCHEDULE0(index); \
147 SHA512ROUND(index, a, b, c, d, e, f, g, h)
148
149 #define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
150 MSGSCHEDULE1(index); \
151 SHA512ROUND(index, a, b, c, d, e, f, g, h)
152
153 // func block(dig *Digest, p []byte)
154 TEXT ·block(SB),0,$128-32
155 MOV p_base+8(FP), X29
156 MOV p_len+16(FP), X30
157 SRL $7, X30
158 SLL $7, X30
159
160 ADD X29, X30, X28
161 BEQ X28, X29, end
162
163 MOV $·_K(SB), X18 // const table
164 ADD $8, X2, X19 // message schedule
165
166 MOV dig+0(FP), X20
167 MOV (0*8)(X20), X10 // a = H0
168 MOV (1*8)(X20), X11 // b = H1
169 MOV (2*8)(X20), X12 // c = H2
170 MOV (3*8)(X20), X13 // d = H3
171 MOV (4*8)(X20), X14 // e = H4
172 MOV (5*8)(X20), X15 // f = H5
173 MOV (6*8)(X20), X16 // g = H6
174 MOV (7*8)(X20), X17 // h = H7
175
176 loop:
177 SHA512ROUND0(0, X10, X11, X12, X13, X14, X15, X16, X17)
178 SHA512ROUND0(1, X17, X10, X11, X12, X13, X14, X15, X16)
179 SHA512ROUND0(2, X16, X17, X10, X11, X12, X13, X14, X15)
180 SHA512ROUND0(3, X15, X16, X17, X10, X11, X12, X13, X14)
181 SHA512ROUND0(4, X14, X15, X16, X17, X10, X11, X12, X13)
182 SHA512ROUND0(5, X13, X14, X15, X16, X17, X10, X11, X12)
183 SHA512ROUND0(6, X12, X13, X14, X15, X16, X17, X10, X11)
184 SHA512ROUND0(7, X11, X12, X13, X14, X15, X16, X17, X10)
185 SHA512ROUND0(8, X10, X11, X12, X13, X14, X15, X16, X17)
186 SHA512ROUND0(9, X17, X10, X11, X12, X13, X14, X15, X16)
187 SHA512ROUND0(10, X16, X17, X10, X11, X12, X13, X14, X15)
188 SHA512ROUND0(11, X15, X16, X17, X10, X11, X12, X13, X14)
189 SHA512ROUND0(12, X14, X15, X16, X17, X10, X11, X12, X13)
190 SHA512ROUND0(13, X13, X14, X15, X16, X17, X10, X11, X12)
191 SHA512ROUND0(14, X12, X13, X14, X15, X16, X17, X10, X11)
192 SHA512ROUND0(15, X11, X12, X13, X14, X15, X16, X17, X10)
193
194 SHA512ROUND1(16, X10, X11, X12, X13, X14, X15, X16, X17)
195 SHA512ROUND1(17, X17, X10, X11, X12, X13, X14, X15, X16)
196 SHA512ROUND1(18, X16, X17, X10, X11, X12, X13, X14, X15)
197 SHA512ROUND1(19, X15, X16, X17, X10, X11, X12, X13, X14)
198 SHA512ROUND1(20, X14, X15, X16, X17, X10, X11, X12, X13)
199 SHA512ROUND1(21, X13, X14, X15, X16, X17, X10, X11, X12)
200 SHA512ROUND1(22, X12, X13, X14, X15, X16, X17, X10, X11)
201 SHA512ROUND1(23, X11, X12, X13, X14, X15, X16, X17, X10)
202 SHA512ROUND1(24, X10, X11, X12, X13, X14, X15, X16, X17)
203 SHA512ROUND1(25, X17, X10, X11, X12, X13, X14, X15, X16)
204 SHA512ROUND1(26, X16, X17, X10, X11, X12, X13, X14, X15)
205 SHA512ROUND1(27, X15, X16, X17, X10, X11, X12, X13, X14)
206 SHA512ROUND1(28, X14, X15, X16, X17, X10, X11, X12, X13)
207 SHA512ROUND1(29, X13, X14, X15, X16, X17, X10, X11, X12)
208 SHA512ROUND1(30, X12, X13, X14, X15, X16, X17, X10, X11)
209 SHA512ROUND1(31, X11, X12, X13, X14, X15, X16, X17, X10)
210 SHA512ROUND1(32, X10, X11, X12, X13, X14, X15, X16, X17)
211 SHA512ROUND1(33, X17, X10, X11, X12, X13, X14, X15, X16)
212 SHA512ROUND1(34, X16, X17, X10, X11, X12, X13, X14, X15)
213 SHA512ROUND1(35, X15, X16, X17, X10, X11, X12, X13, X14)
214 SHA512ROUND1(36, X14, X15, X16, X17, X10, X11, X12, X13)
215 SHA512ROUND1(37, X13, X14, X15, X16, X17, X10, X11, X12)
216 SHA512ROUND1(38, X12, X13, X14, X15, X16, X17, X10, X11)
217 SHA512ROUND1(39, X11, X12, X13, X14, X15, X16, X17, X10)
218 SHA512ROUND1(40, X10, X11, X12, X13, X14, X15, X16, X17)
219 SHA512ROUND1(41, X17, X10, X11, X12, X13, X14, X15, X16)
220 SHA512ROUND1(42, X16, X17, X10, X11, X12, X13, X14, X15)
221 SHA512ROUND1(43, X15, X16, X17, X10, X11, X12, X13, X14)
222 SHA512ROUND1(44, X14, X15, X16, X17, X10, X11, X12, X13)
223 SHA512ROUND1(45, X13, X14, X15, X16, X17, X10, X11, X12)
224 SHA512ROUND1(46, X12, X13, X14, X15, X16, X17, X10, X11)
225 SHA512ROUND1(47, X11, X12, X13, X14, X15, X16, X17, X10)
226 SHA512ROUND1(48, X10, X11, X12, X13, X14, X15, X16, X17)
227 SHA512ROUND1(49, X17, X10, X11, X12, X13, X14, X15, X16)
228 SHA512ROUND1(50, X16, X17, X10, X11, X12, X13, X14, X15)
229 SHA512ROUND1(51, X15, X16, X17, X10, X11, X12, X13, X14)
230 SHA512ROUND1(52, X14, X15, X16, X17, X10, X11, X12, X13)
231 SHA512ROUND1(53, X13, X14, X15, X16, X17, X10, X11, X12)
232 SHA512ROUND1(54, X12, X13, X14, X15, X16, X17, X10, X11)
233 SHA512ROUND1(55, X11, X12, X13, X14, X15, X16, X17, X10)
234 SHA512ROUND1(56, X10, X11, X12, X13, X14, X15, X16, X17)
235 SHA512ROUND1(57, X17, X10, X11, X12, X13, X14, X15, X16)
236 SHA512ROUND1(58, X16, X17, X10, X11, X12, X13, X14, X15)
237 SHA512ROUND1(59, X15, X16, X17, X10, X11, X12, X13, X14)
238 SHA512ROUND1(60, X14, X15, X16, X17, X10, X11, X12, X13)
239 SHA512ROUND1(61, X13, X14, X15, X16, X17, X10, X11, X12)
240 SHA512ROUND1(62, X12, X13, X14, X15, X16, X17, X10, X11)
241 SHA512ROUND1(63, X11, X12, X13, X14, X15, X16, X17, X10)
242 SHA512ROUND1(64, X10, X11, X12, X13, X14, X15, X16, X17)
243 SHA512ROUND1(65, X17, X10, X11, X12, X13, X14, X15, X16)
244 SHA512ROUND1(66, X16, X17, X10, X11, X12, X13, X14, X15)
245 SHA512ROUND1(67, X15, X16, X17, X10, X11, X12, X13, X14)
246 SHA512ROUND1(68, X14, X15, X16, X17, X10, X11, X12, X13)
247 SHA512ROUND1(69, X13, X14, X15, X16, X17, X10, X11, X12)
248 SHA512ROUND1(70, X12, X13, X14, X15, X16, X17, X10, X11)
249 SHA512ROUND1(71, X11, X12, X13, X14, X15, X16, X17, X10)
250 SHA512ROUND1(72, X10, X11, X12, X13, X14, X15, X16, X17)
251 SHA512ROUND1(73, X17, X10, X11, X12, X13, X14, X15, X16)
252 SHA512ROUND1(74, X16, X17, X10, X11, X12, X13, X14, X15)
253 SHA512ROUND1(75, X15, X16, X17, X10, X11, X12, X13, X14)
254 SHA512ROUND1(76, X14, X15, X16, X17, X10, X11, X12, X13)
255 SHA512ROUND1(77, X13, X14, X15, X16, X17, X10, X11, X12)
256 SHA512ROUND1(78, X12, X13, X14, X15, X16, X17, X10, X11)
257 SHA512ROUND1(79, X11, X12, X13, X14, X15, X16, X17, X10)
258
259 MOV (0*8)(X20), X5
260 MOV (1*8)(X20), X6
261 MOV (2*8)(X20), X7
262 MOV (3*8)(X20), X8
263 ADD X5, X10 // H0 = a + H0
264 ADD X6, X11 // H1 = b + H1
265 ADD X7, X12 // H2 = c + H2
266 ADD X8, X13 // H3 = d + H3
267 MOV X10, (0*8)(X20)
268 MOV X11, (1*8)(X20)
269 MOV X12, (2*8)(X20)
270 MOV X13, (3*8)(X20)
271 MOV (4*8)(X20), X5
272 MOV (5*8)(X20), X6
273 MOV (6*8)(X20), X7
274 MOV (7*8)(X20), X8
275 ADD X5, X14 // H4 = e + H4
276 ADD X6, X15 // H5 = f + H5
277 ADD X7, X16 // H6 = g + H6
278 ADD X8, X17 // H7 = h + H7
279 MOV X14, (4*8)(X20)
280 MOV X15, (5*8)(X20)
281 MOV X16, (6*8)(X20)
282 MOV X17, (7*8)(X20)
283
284 ADD $128, X29
285 BNE X28, X29, loop
286
287 end:
288 RET
289
View as plain text