1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA256 block routine. See sha256block.go for Go equivalent.
10 //
11 // The algorithm is detailed in FIPS 180-4:
12 //
13 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
14 //
15 // Wt = Mt; for 0 <= t <= 15
16 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
17 //
18 // a = H0
19 // b = H1
20 // c = H2
21 // d = H3
22 // e = H4
23 // f = H5
24 // g = H6
25 // h = H7
26 //
27 // for t = 0 to 63 {
28 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
29 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
30 // h = g
31 // g = f
32 // f = e
33 // e = d + T1
34 // d = c
35 // c = b
36 // b = a
37 // a = T1 + T2
38 // }
39 //
40 // H0 = a + H0
41 // H1 = b + H1
42 // H2 = c + H2
43 // H3 = d + H3
44 // H4 = e + H4
45 // H5 = f + H5
46 // H6 = g + H6
47 // H7 = h + H7
48
49 // Wt = Mt; for 0 <= t <= 15
50 #define MSGSCHEDULE0(index) \
51 MOVBU ((index*4)+0)(X29), X5; \
52 MOVBU ((index*4)+1)(X29), X6; \
53 MOVBU ((index*4)+2)(X29), X7; \
54 MOVBU ((index*4)+3)(X29), X8; \
55 SLL $24, X5; \
56 SLL $16, X6; \
57 OR X5, X6, X5; \
58 SLL $8, X7; \
59 OR X5, X7, X5; \
60 OR X5, X8, X5; \
61 MOVW X5, (index*4)(X19)
62
63 // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
64 // SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
65 // SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
66 #define MSGSCHEDULE1(index) \
67 MOVWU (((index-2)&0xf)*4)(X19), X5; \
68 MOVWU (((index-15)&0xf)*4)(X19), X6; \
69 MOVWU (((index-7)&0xf)*4)(X19), X9; \
70 MOVWU (((index-16)&0xf)*4)(X19), X21; \
71 RORW $17, X5, X7; \
72 RORW $19, X5, X8; \
73 SRL $10, X5; \
74 XOR X7, X5; \
75 XOR X8, X5; \
76 ADD X9, X5; \
77 RORW $7, X6, X7; \
78 RORW $18, X6, X8; \
79 SRL $3, X6; \
80 XOR X7, X6; \
81 XOR X8, X6; \
82 ADD X6, X5; \
83 ADD X21, X5; \
84 MOVW X5, ((index&0xf)*4)(X19)
85
86 // Calculate T1 in X5.
87 // h is also used as an accumulator. Wt is passed in X5.
88 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
89 // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
90 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
91 #define SHA256T1(index, e, f, g, h) \
92 MOVWU (index*4)(X18), X8; \
93 ADD X5, h; \
94 RORW $6, e, X6; \
95 ADD X8, h; \
96 RORW $11, e, X7; \
97 XOR X7, X6; \
98 RORW $25, e, X8; \
99 XOR X8, X6; \
100 ADD X6, h; \
101 AND e, f, X5; \
102 NOT e, X7; \
103 AND g, X7; \
104 XOR X7, X5; \
105 ADD h, X5
106
107 // Calculate T2 in X6.
108 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
109 // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
110 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
111 #define SHA256T2(a, b, c) \
112 RORW $2, a, X6; \
113 RORW $13, a, X7; \
114 XOR X7, X6; \
115 RORW $22, a, X8; \
116 XOR X8, X6; \
117 AND a, b, X7; \
118 AND a, c, X8; \
119 XOR X8, X7; \
120 AND b, c, X9; \
121 XOR X9, X7; \
122 ADD X7, X6
123
124 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
125 // The values for e and a are stored in d and h, ready for rotation.
126 #define SHA256ROUND(index, a, b, c, d, e, f, g, h) \
127 SHA256T1(index, e, f, g, h); \
128 SHA256T2(a, b, c); \
129 MOV X6, h; \
130 ADD X5, d; \
131 ADD X5, h
132
133 #define SHA256ROUND0(index, a, b, c, d, e, f, g, h) \
134 MSGSCHEDULE0(index); \
135 SHA256ROUND(index, a, b, c, d, e, f, g, h)
136
137 #define SHA256ROUND1(index, a, b, c, d, e, f, g, h) \
138 MSGSCHEDULE1(index); \
139 SHA256ROUND(index, a, b, c, d, e, f, g, h)
140
141 // Note that 64 bytes of stack space is used as a circular buffer
142 // for the message schedule (4 bytes * 16 entries).
143 //
144 // func block(dig *Digest, p []byte)
145 TEXT ·block(SB),0,$64-32
146 MOV p_base+8(FP), X29
147 MOV p_len+16(FP), X30
148 SRL $6, X30
149 SLL $6, X30
150
151 ADD X29, X30, X28
152 BEQ X28, X29, end
153
154 MOV $·_K(SB), X18 // const table
155 ADD $8, X2, X19 // message schedule
156
157 MOV dig+0(FP), X20
158 MOVWU (0*4)(X20), X10 // a = H0
159 MOVWU (1*4)(X20), X11 // b = H1
160 MOVWU (2*4)(X20), X12 // c = H2
161 MOVWU (3*4)(X20), X13 // d = H3
162 MOVWU (4*4)(X20), X14 // e = H4
163 MOVWU (5*4)(X20), X15 // f = H5
164 MOVWU (6*4)(X20), X16 // g = H6
165 MOVWU (7*4)(X20), X17 // h = H7
166
167 loop:
168 SHA256ROUND0(0, X10, X11, X12, X13, X14, X15, X16, X17)
169 SHA256ROUND0(1, X17, X10, X11, X12, X13, X14, X15, X16)
170 SHA256ROUND0(2, X16, X17, X10, X11, X12, X13, X14, X15)
171 SHA256ROUND0(3, X15, X16, X17, X10, X11, X12, X13, X14)
172 SHA256ROUND0(4, X14, X15, X16, X17, X10, X11, X12, X13)
173 SHA256ROUND0(5, X13, X14, X15, X16, X17, X10, X11, X12)
174 SHA256ROUND0(6, X12, X13, X14, X15, X16, X17, X10, X11)
175 SHA256ROUND0(7, X11, X12, X13, X14, X15, X16, X17, X10)
176 SHA256ROUND0(8, X10, X11, X12, X13, X14, X15, X16, X17)
177 SHA256ROUND0(9, X17, X10, X11, X12, X13, X14, X15, X16)
178 SHA256ROUND0(10, X16, X17, X10, X11, X12, X13, X14, X15)
179 SHA256ROUND0(11, X15, X16, X17, X10, X11, X12, X13, X14)
180 SHA256ROUND0(12, X14, X15, X16, X17, X10, X11, X12, X13)
181 SHA256ROUND0(13, X13, X14, X15, X16, X17, X10, X11, X12)
182 SHA256ROUND0(14, X12, X13, X14, X15, X16, X17, X10, X11)
183 SHA256ROUND0(15, X11, X12, X13, X14, X15, X16, X17, X10)
184
185 SHA256ROUND1(16, X10, X11, X12, X13, X14, X15, X16, X17)
186 SHA256ROUND1(17, X17, X10, X11, X12, X13, X14, X15, X16)
187 SHA256ROUND1(18, X16, X17, X10, X11, X12, X13, X14, X15)
188 SHA256ROUND1(19, X15, X16, X17, X10, X11, X12, X13, X14)
189 SHA256ROUND1(20, X14, X15, X16, X17, X10, X11, X12, X13)
190 SHA256ROUND1(21, X13, X14, X15, X16, X17, X10, X11, X12)
191 SHA256ROUND1(22, X12, X13, X14, X15, X16, X17, X10, X11)
192 SHA256ROUND1(23, X11, X12, X13, X14, X15, X16, X17, X10)
193 SHA256ROUND1(24, X10, X11, X12, X13, X14, X15, X16, X17)
194 SHA256ROUND1(25, X17, X10, X11, X12, X13, X14, X15, X16)
195 SHA256ROUND1(26, X16, X17, X10, X11, X12, X13, X14, X15)
196 SHA256ROUND1(27, X15, X16, X17, X10, X11, X12, X13, X14)
197 SHA256ROUND1(28, X14, X15, X16, X17, X10, X11, X12, X13)
198 SHA256ROUND1(29, X13, X14, X15, X16, X17, X10, X11, X12)
199 SHA256ROUND1(30, X12, X13, X14, X15, X16, X17, X10, X11)
200 SHA256ROUND1(31, X11, X12, X13, X14, X15, X16, X17, X10)
201 SHA256ROUND1(32, X10, X11, X12, X13, X14, X15, X16, X17)
202 SHA256ROUND1(33, X17, X10, X11, X12, X13, X14, X15, X16)
203 SHA256ROUND1(34, X16, X17, X10, X11, X12, X13, X14, X15)
204 SHA256ROUND1(35, X15, X16, X17, X10, X11, X12, X13, X14)
205 SHA256ROUND1(36, X14, X15, X16, X17, X10, X11, X12, X13)
206 SHA256ROUND1(37, X13, X14, X15, X16, X17, X10, X11, X12)
207 SHA256ROUND1(38, X12, X13, X14, X15, X16, X17, X10, X11)
208 SHA256ROUND1(39, X11, X12, X13, X14, X15, X16, X17, X10)
209 SHA256ROUND1(40, X10, X11, X12, X13, X14, X15, X16, X17)
210 SHA256ROUND1(41, X17, X10, X11, X12, X13, X14, X15, X16)
211 SHA256ROUND1(42, X16, X17, X10, X11, X12, X13, X14, X15)
212 SHA256ROUND1(43, X15, X16, X17, X10, X11, X12, X13, X14)
213 SHA256ROUND1(44, X14, X15, X16, X17, X10, X11, X12, X13)
214 SHA256ROUND1(45, X13, X14, X15, X16, X17, X10, X11, X12)
215 SHA256ROUND1(46, X12, X13, X14, X15, X16, X17, X10, X11)
216 SHA256ROUND1(47, X11, X12, X13, X14, X15, X16, X17, X10)
217 SHA256ROUND1(48, X10, X11, X12, X13, X14, X15, X16, X17)
218 SHA256ROUND1(49, X17, X10, X11, X12, X13, X14, X15, X16)
219 SHA256ROUND1(50, X16, X17, X10, X11, X12, X13, X14, X15)
220 SHA256ROUND1(51, X15, X16, X17, X10, X11, X12, X13, X14)
221 SHA256ROUND1(52, X14, X15, X16, X17, X10, X11, X12, X13)
222 SHA256ROUND1(53, X13, X14, X15, X16, X17, X10, X11, X12)
223 SHA256ROUND1(54, X12, X13, X14, X15, X16, X17, X10, X11)
224 SHA256ROUND1(55, X11, X12, X13, X14, X15, X16, X17, X10)
225 SHA256ROUND1(56, X10, X11, X12, X13, X14, X15, X16, X17)
226 SHA256ROUND1(57, X17, X10, X11, X12, X13, X14, X15, X16)
227 SHA256ROUND1(58, X16, X17, X10, X11, X12, X13, X14, X15)
228 SHA256ROUND1(59, X15, X16, X17, X10, X11, X12, X13, X14)
229 SHA256ROUND1(60, X14, X15, X16, X17, X10, X11, X12, X13)
230 SHA256ROUND1(61, X13, X14, X15, X16, X17, X10, X11, X12)
231 SHA256ROUND1(62, X12, X13, X14, X15, X16, X17, X10, X11)
232 SHA256ROUND1(63, X11, X12, X13, X14, X15, X16, X17, X10)
233
234 MOVWU (0*4)(X20), X5
235 MOVWU (1*4)(X20), X6
236 MOVWU (2*4)(X20), X7
237 MOVWU (3*4)(X20), X8
238 ADD X5, X10 // H0 = a + H0
239 ADD X6, X11 // H1 = b + H1
240 ADD X7, X12 // H2 = c + H2
241 ADD X8, X13 // H3 = d + H3
242 MOVW X10, (0*4)(X20)
243 MOVW X11, (1*4)(X20)
244 MOVW X12, (2*4)(X20)
245 MOVW X13, (3*4)(X20)
246 MOVWU (4*4)(X20), X5
247 MOVWU (5*4)(X20), X6
248 MOVWU (6*4)(X20), X7
249 MOVWU (7*4)(X20), X8
250 ADD X5, X14 // H4 = e + H4
251 ADD X6, X15 // H5 = f + H5
252 ADD X7, X16 // H6 = g + H6
253 ADD X8, X17 // H7 = h + H7
254 MOVW X14, (4*4)(X20)
255 MOVW X15, (5*4)(X20)
256 MOVW X16, (6*4)(X20)
257 MOVW X17, (7*4)(X20)
258
259 ADD $64, X29
260 BNE X28, X29, loop
261
262 end:
263 RET
264
View as plain text