1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8 DATA rotInvSRows<>+0x00(SB)/8, $0x080f0205040b0e01
9 DATA rotInvSRows<>+0x08(SB)/8, $0x00070a0d0c030609
10 GLOBL rotInvSRows<>(SB), (NOPTR+RODATA), $16
11 DATA invSRows<>+0x00(SB)/8, $0x0b0e0104070a0d00
12 DATA invSRows<>+0x08(SB)/8, $0x0306090c0f020508
13 GLOBL invSRows<>(SB), (NOPTR+RODATA), $16
14 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
15 TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
16 MOVD nr+0(FP), R9
17 MOVD xk+8(FP), R10
18 MOVD dst+16(FP), R11
19 MOVD src+24(FP), R12
20
21 VLD1 (R12), [V0.B16]
22
23 CMP $12, R9
24 BLT enc128
25 BEQ enc196
26 enc256:
27 VLD1.P 32(R10), [V1.B16, V2.B16]
28 AESE V1.B16, V0.B16
29 AESMC V0.B16, V0.B16
30 AESE V2.B16, V0.B16
31 AESMC V0.B16, V0.B16
32 enc196:
33 VLD1.P 32(R10), [V3.B16, V4.B16]
34 AESE V3.B16, V0.B16
35 AESMC V0.B16, V0.B16
36 AESE V4.B16, V0.B16
37 AESMC V0.B16, V0.B16
38 enc128:
39 VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
40 VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
41 VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16]
42 AESE V5.B16, V0.B16
43 AESMC V0.B16, V0.B16
44 AESE V6.B16, V0.B16
45 AESMC V0.B16, V0.B16
46 AESE V7.B16, V0.B16
47 AESMC V0.B16, V0.B16
48 AESE V8.B16, V0.B16
49 AESMC V0.B16, V0.B16
50 AESE V9.B16, V0.B16
51 AESMC V0.B16, V0.B16
52 AESE V10.B16, V0.B16
53 AESMC V0.B16, V0.B16
54 AESE V11.B16, V0.B16
55 AESMC V0.B16, V0.B16
56 AESE V12.B16, V0.B16
57 AESMC V0.B16, V0.B16
58 AESE V13.B16, V0.B16
59 AESMC V0.B16, V0.B16
60 AESE V14.B16, V0.B16
61 VEOR V0.B16, V15.B16, V0.B16
62 VST1 [V0.B16], (R11)
63 RET
64
65 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
66 TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
67 MOVD nr+0(FP), R9
68 MOVD xk+8(FP), R10
69 MOVD dst+16(FP), R11
70 MOVD src+24(FP), R12
71
72 VLD1 (R12), [V0.B16]
73
74 CMP $12, R9
75 BLT dec128
76 BEQ dec196
77 dec256:
78 VLD1.P 32(R10), [V1.B16, V2.B16]
79 AESD V1.B16, V0.B16
80 AESIMC V0.B16, V0.B16
81 AESD V2.B16, V0.B16
82 AESIMC V0.B16, V0.B16
83 dec196:
84 VLD1.P 32(R10), [V3.B16, V4.B16]
85 AESD V3.B16, V0.B16
86 AESIMC V0.B16, V0.B16
87 AESD V4.B16, V0.B16
88 AESIMC V0.B16, V0.B16
89 dec128:
90 VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
91 VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
92 VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16]
93 AESD V5.B16, V0.B16
94 AESIMC V0.B16, V0.B16
95 AESD V6.B16, V0.B16
96 AESIMC V0.B16, V0.B16
97 AESD V7.B16, V0.B16
98 AESIMC V0.B16, V0.B16
99 AESD V8.B16, V0.B16
100 AESIMC V0.B16, V0.B16
101 AESD V9.B16, V0.B16
102 AESIMC V0.B16, V0.B16
103 AESD V10.B16, V0.B16
104 AESIMC V0.B16, V0.B16
105 AESD V11.B16, V0.B16
106 AESIMC V0.B16, V0.B16
107 AESD V12.B16, V0.B16
108 AESIMC V0.B16, V0.B16
109 AESD V13.B16, V0.B16
110 AESIMC V0.B16, V0.B16
111 AESD V14.B16, V0.B16
112 VEOR V0.B16, V15.B16, V0.B16
113 VST1 [V0.B16], (R11)
114 RET
115
116 // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
117 // Note that round keys are stored in uint128 format, not uint32
118 TEXT ·expandKeyAsm(SB),NOSPLIT,$0
119 MOVD nr+0(FP), R8
120 MOVD key+8(FP), R9
121 MOVD enc+16(FP), R10
122 MOVD dec+24(FP), R11
123 LDP rotInvSRows<>(SB), (R0, R1)
124 VMOV R0, V3.D[0]
125 VMOV R1, V3.D[1]
126 VEOR V0.B16, V0.B16, V0.B16 // All zeroes
127 MOVW $1, R13
128 TBZ $1, R8, ks192
129 TBNZ $2, R8, ks256
130 LDPW (R9), (R4, R5)
131 LDPW 8(R9), (R6, R7)
132 STPW.P (R4, R5), 8(R10)
133 STPW.P (R6, R7), 8(R10)
134 MOVW $0x1b, R14
135 ks128Loop:
136 VMOV R7, V2.S[0]
137 WORD $0x4E030042 // TBL V3.B16, [V2.B16], V2.B16
138 AESE V0.B16, V2.B16 // Use AES to compute the SBOX
139 EORW R13, R4
140 LSLW $1, R13 // Compute next Rcon
141 ANDSW $0x100, R13, ZR
142 CSELW NE, R14, R13, R13 // Fake modulo
143 SUBS $1, R8
144 VMOV V2.S[0], R0
145 EORW R0, R4
146 EORW R4, R5
147 EORW R5, R6
148 EORW R6, R7
149 STPW.P (R4, R5), 8(R10)
150 STPW.P (R6, R7), 8(R10)
151 BNE ks128Loop
152 CBZ R11, ksDone // If dec is nil we are done
153 SUB $176, R10
154 // Decryption keys are encryption keys with InverseMixColumns applied
155 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
156 VMOV V0.B16, V7.B16
157 AESIMC V1.B16, V6.B16
158 AESIMC V2.B16, V5.B16
159 AESIMC V3.B16, V4.B16
160 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
161 AESIMC V0.B16, V11.B16
162 AESIMC V1.B16, V10.B16
163 AESIMC V2.B16, V9.B16
164 AESIMC V3.B16, V8.B16
165 VLD1 (R10), [V0.B16, V1.B16, V2.B16]
166 AESIMC V0.B16, V14.B16
167 AESIMC V1.B16, V13.B16
168 VMOV V2.B16, V12.B16
169 VST1.P [V12.B16, V13.B16, V14.B16], 48(R11)
170 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
171 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11)
172 B ksDone
173 ks192:
174 LDPW (R9), (R2, R3)
175 LDPW 8(R9), (R4, R5)
176 LDPW 16(R9), (R6, R7)
177 STPW.P (R2, R3), 8(R10)
178 STPW.P (R4, R5), 8(R10)
179 SUB $4, R8
180 ks192Loop:
181 STPW.P (R6, R7), 8(R10)
182 VMOV R7, V2.S[0]
183 WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16
184 AESE V0.B16, V2.B16
185 EORW R13, R2
186 LSLW $1, R13
187 SUBS $1, R8
188 VMOV V2.S[0], R0
189 EORW R0, R2
190 EORW R2, R3
191 EORW R3, R4
192 EORW R4, R5
193 EORW R5, R6
194 EORW R6, R7
195 STPW.P (R2, R3), 8(R10)
196 STPW.P (R4, R5), 8(R10)
197 BNE ks192Loop
198 CBZ R11, ksDone
199 SUB $208, R10
200 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
201 VMOV V0.B16, V7.B16
202 AESIMC V1.B16, V6.B16
203 AESIMC V2.B16, V5.B16
204 AESIMC V3.B16, V4.B16
205 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
206 AESIMC V0.B16, V11.B16
207 AESIMC V1.B16, V10.B16
208 AESIMC V2.B16, V9.B16
209 AESIMC V3.B16, V8.B16
210 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
211 AESIMC V0.B16, V15.B16
212 AESIMC V1.B16, V14.B16
213 AESIMC V2.B16, V13.B16
214 AESIMC V3.B16, V12.B16
215 VLD1 (R10), [V0.B16]
216 VST1.P [V0.B16], 16(R11)
217 VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
218 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
219 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11)
220 B ksDone
221 ks256:
222 LDP invSRows<>(SB), (R0, R1)
223 VMOV R0, V4.D[0]
224 VMOV R1, V4.D[1]
225 LDPW (R9), (R0, R1)
226 LDPW 8(R9), (R2, R3)
227 LDPW 16(R9), (R4, R5)
228 LDPW 24(R9), (R6, R7)
229 STPW.P (R0, R1), 8(R10)
230 STPW.P (R2, R3), 8(R10)
231 SUB $7, R8
232 ks256Loop:
233 STPW.P (R4, R5), 8(R10)
234 STPW.P (R6, R7), 8(R10)
235 VMOV R7, V2.S[0]
236 WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16
237 AESE V0.B16, V2.B16
238 EORW R13, R0
239 LSLW $1, R13
240 SUBS $1, R8
241 VMOV V2.S[0], R9
242 EORW R9, R0
243 EORW R0, R1
244 EORW R1, R2
245 EORW R2, R3
246 VMOV R3, V2.S[0]
247 WORD $0x4E040042 //TBL V3.B16, [V2.B16], V2.B16
248 AESE V0.B16, V2.B16
249 VMOV V2.S[0], R9
250 EORW R9, R4
251 EORW R4, R5
252 EORW R5, R6
253 EORW R6, R7
254 STPW.P (R0, R1), 8(R10)
255 STPW.P (R2, R3), 8(R10)
256 BNE ks256Loop
257 CBZ R11, ksDone
258 SUB $240, R10
259 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
260 VMOV V0.B16, V7.B16
261 AESIMC V1.B16, V6.B16
262 AESIMC V2.B16, V5.B16
263 AESIMC V3.B16, V4.B16
264 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
265 AESIMC V0.B16, V11.B16
266 AESIMC V1.B16, V10.B16
267 AESIMC V2.B16, V9.B16
268 AESIMC V3.B16, V8.B16
269 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
270 AESIMC V0.B16, V15.B16
271 AESIMC V1.B16, V14.B16
272 AESIMC V2.B16, V13.B16
273 AESIMC V3.B16, V12.B16
274 VLD1 (R10), [V0.B16, V1.B16, V2.B16]
275 AESIMC V0.B16, V18.B16
276 AESIMC V1.B16, V17.B16
277 VMOV V2.B16, V16.B16
278 VST1.P [V16.B16, V17.B16, V18.B16], 48(R11)
279 VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
280 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
281 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11)
282 ksDone:
283 RET
284
View as plain text