Text file
src/crypto/aes/asm_ppc64x.s
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build (ppc64 || ppc64le) && !purego
6
7 // Based on CRYPTOGAMS code with the following comment:
8 // # ====================================================================
9 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
10 // # project. The module is, however, dual licensed under OpenSSL and
11 // # CRYPTOGAMS licenses depending on where you obtain it. For further
12 // # details see http://www.openssl.org/~appro/cryptogams/.
13 // # ====================================================================
14
15 // Original code can be found at the link below:
16 // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
17
18 // Some function names were changed to be consistent with Go function
19 // names. For instance, function aes_p8_set_{en,de}crypt_key become
20 // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
21 // and a new session was created (doEncryptKeyAsm). This was necessary to
22 // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
23 // There were other modifications as well but kept the same functionality.
24
25 #include "textflag.h"
26
27 // For expandKeyAsm
28 #define INP R3
29 #define BITS R4
30 #define OUTENC R5 // Pointer to next expanded encrypt key
31 #define PTR R6
32 #define CNT R7
33 #define ROUNDS R8
34 #define OUTDEC R9 // Pointer to next expanded decrypt key
35 #define TEMP R19
36 #define ZERO V0
37 #define IN0 V1
38 #define IN1 V2
39 #define KEY V3
40 #define RCON V4
41 #define MASK V5
42 #define TMP V6
43 #define STAGE V7
44 #define OUTPERM V8
45 #define OUTMASK V9
46 #define OUTHEAD V10
47 #define OUTTAIL V11
48
49 // For P9 instruction emulation
50 #define ESPERM V21 // Endian swapping permute into BE
51 #define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X
52
53 // For {en,de}cryptBlockAsm
54 #define BLK_INP R3
55 #define BLK_OUT R4
56 #define BLK_KEY R5
57 #define BLK_ROUNDS R6
58 #define BLK_IDX R7
59
60 DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap
61 DATA ·rcon+0x08(SB)/8, $0x0706050403020100
62 DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON
63 DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON
64 DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000
65 DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000
66 DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
67 DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
68 DATA ·rcon+0x40(SB)/8, $0x0000000000000000
69 DATA ·rcon+0x48(SB)/8, $0x0000000000000000
70 GLOBL ·rcon(SB), RODATA, $80
71
72 #ifdef GOARCH_ppc64le
73 # ifdef GOPPC64_power9
74 #define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT
75 #define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB)
76 #define XXBRD_ON_LE(VA,VT) XXBRD VA, VT
77 # else
78 // On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
79 // doublewords and byte-swapping each doubleword to emulate BE load/stores.
80 #define NEEDS_ESPERM
81 #define P8_LXVB16X(RA,RB,VT) \
82 LXVD2X (RA+RB), VT \
83 VPERM VT, VT, ESPERM, VT
84
85 #define P8_STXVB16X(VS,RA,RB) \
86 VPERM VS, VS, ESPERM, TMP2 \
87 STXVD2X TMP2, (RA+RB)
88
89 #define XXBRD_ON_LE(VA,VT) \
90 VPERM VA, VA, ESPERM, VT
91
92 # endif // defined(GOPPC64_power9)
93 #else
94 #define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT
95 #define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB)
96 #define XXBRD_ON_LE(VA, VT)
97 #endif // defined(GOARCH_ppc64le)
98
99 // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
100 TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
101 // Load the arguments inside the registers
102 MOVD nr+0(FP), ROUNDS
103 MOVD key+8(FP), INP
104 MOVD enc+16(FP), OUTENC
105 MOVD dec+24(FP), OUTDEC
106
107 #ifdef NEEDS_ESPERM
108 MOVD $·rcon(SB), PTR // PTR points to rcon addr
109 LVX (PTR), ESPERM
110 ADD $0x10, PTR
111 #else
112 MOVD $·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector)
113 #endif
114
115 // Get key from memory and write aligned into VR
116 P8_LXVB16X(INP, R0, IN0)
117 ADD $0x10, INP, INP
118 MOVD $0x20, TEMP
119
120 CMPW ROUNDS, $12
121 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON
122 LVX (PTR)(TEMP), MASK
123 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON
124 MOVD $8, CNT // li 7,8 CNT = 8
125 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :)
126 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds)
127
128 // The expanded decrypt key is the expanded encrypt key stored in reverse order.
129 // Move OUTDEC to the last key location, and store in descending order.
130 ADD $160, OUTDEC, OUTDEC
131 BLT loop128
132 ADD $32, OUTDEC, OUTDEC
133 BEQ l192
134 ADD $32, OUTDEC, OUTDEC
135 JMP l256
136
137 loop128:
138 // Key schedule (Round 1 to 8)
139 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
140 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
141 STXVD2X IN0, (R0+OUTENC)
142 STXVD2X IN0, (R0+OUTDEC)
143 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
144 ADD $16, OUTENC, OUTENC
145 ADD $-16, OUTDEC, OUTDEC
146
147 VXOR IN0, TMP, IN0 // vxor 1,1,6
148 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
149 VXOR IN0, TMP, IN0 // vxor 1,1,6
150 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
151 VXOR IN0, TMP, IN0 // vxor 1,1,6
152 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
153 VXOR IN0, KEY, IN0 // vxor 1,1,3
154 BDNZ loop128
155
156 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys
157
158 // Key schedule (Round 9)
159 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat
160 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
161 STXVD2X IN0, (R0+OUTENC)
162 STXVD2X IN0, (R0+OUTDEC)
163 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
164 ADD $16, OUTENC, OUTENC
165 ADD $-16, OUTDEC, OUTDEC
166
167 // Key schedule (Round 10)
168 VXOR IN0, TMP, IN0 // vxor 1,1,6
169 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
170 VXOR IN0, TMP, IN0 // vxor 1,1,6
171 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
172 VXOR IN0, TMP, IN0 // vxor 1,1,6
173 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
174 VXOR IN0, KEY, IN0 // vxor 1,1,3
175
176 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
177 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
178 STXVD2X IN0, (R0+OUTENC)
179 STXVD2X IN0, (R0+OUTDEC)
180 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
181 ADD $16, OUTENC, OUTENC
182 ADD $-16, OUTDEC, OUTDEC
183
184 // Key schedule (Round 11)
185 VXOR IN0, TMP, IN0 // vxor 1,1,6
186 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
187 VXOR IN0, TMP, IN0 // vxor 1,1,6
188 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
189 VXOR IN0, TMP, IN0 // vxor 1,1,6
190 VXOR IN0, KEY, IN0 // vxor 1,1,3
191 STXVD2X IN0, (R0+OUTENC)
192 STXVD2X IN0, (R0+OUTDEC)
193
194 RET
195
196 l192:
197 LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR.
198 XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts.
199 MOVD $4, CNT // li 7,4
200 STXVD2X IN0, (R0+OUTENC)
201 STXVD2X IN0, (R0+OUTDEC)
202 ADD $16, OUTENC, OUTENC
203 ADD $-16, OUTDEC, OUTDEC
204 VSPLTISB $8, KEY // vspltisb 3,8
205 MOVD CNT, CTR // mtctr 7
206 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3
207
208 loop192:
209 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
210 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
211 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
212
213 VXOR IN0, TMP, IN0 // vxor 1,1,6
214 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
215 VXOR IN0, TMP, IN0 // vxor 1,1,6
216 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
217 VXOR IN0, TMP, IN0 // vxor 1,1,6
218
219 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8
220 VSPLTW $3, IN0, TMP // vspltw 6,1,3
221 VXOR TMP, IN1, TMP // vxor 6,6,2
222 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
223 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
224 VXOR IN1, TMP, IN1 // vxor 2,2,6
225 VXOR IN0, KEY, IN0 // vxor 1,1,3
226 VXOR IN1, KEY, IN1 // vxor 2,2,3
227 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
228
229 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
230 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
231 STXVD2X STAGE, (R0+OUTENC)
232 STXVD2X STAGE, (R0+OUTDEC)
233 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
234 ADD $16, OUTENC, OUTENC
235 ADD $-16, OUTDEC, OUTDEC
236
237 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8
238 VXOR IN0, TMP, IN0 // vxor 1,1,6
239 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
240 STXVD2X STAGE, (R0+OUTENC)
241 STXVD2X STAGE, (R0+OUTDEC)
242 VXOR IN0, TMP, IN0 // vxor 1,1,6
243 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
244 VXOR IN0, TMP, IN0 // vxor 1,1,6
245 ADD $16, OUTENC, OUTENC
246 ADD $-16, OUTDEC, OUTDEC
247
248 VSPLTW $3, IN0, TMP // vspltw 6,1,3
249 VXOR TMP, IN1, TMP // vxor 6,6,2
250 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
251 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
252 VXOR IN1, TMP, IN1 // vxor 2,2,6
253 VXOR IN0, KEY, IN0 // vxor 1,1,3
254 VXOR IN1, KEY, IN1 // vxor 2,2,3
255 STXVD2X IN0, (R0+OUTENC)
256 STXVD2X IN0, (R0+OUTDEC)
257 ADD $16, OUTENC, OUTENC
258 ADD $-16, OUTDEC, OUTDEC
259 BDNZ loop192
260
261 RET
262
263 l256:
264 P8_LXVB16X(INP, R0, IN1)
265 MOVD $7, CNT // li 7,7
266 STXVD2X IN0, (R0+OUTENC)
267 STXVD2X IN0, (R0+OUTDEC)
268 ADD $16, OUTENC, OUTENC
269 ADD $-16, OUTDEC, OUTDEC
270 MOVD CNT, CTR // mtctr 7
271
272 loop256:
273 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
274 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
275 STXVD2X IN1, (R0+OUTENC)
276 STXVD2X IN1, (R0+OUTDEC)
277 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
278 ADD $16, OUTENC, OUTENC
279 ADD $-16, OUTDEC, OUTDEC
280
281 VXOR IN0, TMP, IN0 // vxor 1,1,6
282 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
283 VXOR IN0, TMP, IN0 // vxor 1,1,6
284 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
285 VXOR IN0, TMP, IN0 // vxor 1,1,6
286 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
287 VXOR IN0, KEY, IN0 // vxor 1,1,3
288 STXVD2X IN0, (R0+OUTENC)
289 STXVD2X IN0, (R0+OUTDEC)
290 ADD $16, OUTENC, OUTENC
291 ADD $-16, OUTDEC, OUTDEC
292 BDZ done
293
294 VSPLTW $3, IN0, KEY // vspltw 3,1,3
295 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12
296 VSBOX KEY, KEY // vsbox 3,3
297
298 VXOR IN1, TMP, IN1 // vxor 2,2,6
299 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
300 VXOR IN1, TMP, IN1 // vxor 2,2,6
301 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
302 VXOR IN1, TMP, IN1 // vxor 2,2,6
303
304 VXOR IN1, KEY, IN1 // vxor 2,2,3
305 JMP loop256 // b .Loop256
306
307 done:
308 RET
309
310 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
311 TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
312 MOVD nr+0(FP), R6 // Round count/Key size
313 MOVD xk+8(FP), R5 // Key pointer
314 MOVD dst+16(FP), R3 // Dest pointer
315 MOVD src+24(FP), R4 // Src pointer
316 #ifdef NEEDS_ESPERM
317 MOVD $·rcon(SB), R7
318 LVX (R7), ESPERM // Permute value for P8_ macros.
319 #endif
320
321 // Set CR{1,2,3}EQ to hold the key size information.
322 CMPU R6, $10, CR1
323 CMPU R6, $12, CR2
324 CMPU R6, $14, CR3
325
326 MOVD $16, R6
327 MOVD $32, R7
328 MOVD $48, R8
329 MOVD $64, R9
330 MOVD $80, R10
331 MOVD $96, R11
332 MOVD $112, R12
333
334 // Load text in BE order
335 P8_LXVB16X(R4, R0, V0)
336
337 // V1, V2 will hold keys, V0 is a temp.
338 // At completion, V2 will hold the ciphertext.
339 // Load xk[0:3] and xor with text
340 LXVD2X (R0+R5), V1
341 VXOR V0, V1, V0
342
343 // Load xk[4:11] and cipher
344 LXVD2X (R6+R5), V1
345 LXVD2X (R7+R5), V2
346 VCIPHER V0, V1, V0
347 VCIPHER V0, V2, V0
348
349 // Load xk[12:19] and cipher
350 LXVD2X (R8+R5), V1
351 LXVD2X (R9+R5), V2
352 VCIPHER V0, V1, V0
353 VCIPHER V0, V2, V0
354
355 // Load xk[20:27] and cipher
356 LXVD2X (R10+R5), V1
357 LXVD2X (R11+R5), V2
358 VCIPHER V0, V1, V0
359 VCIPHER V0, V2, V0
360
361 // Increment xk pointer to reuse constant offsets in R6-R12.
362 ADD $112, R5
363
364 // Load xk[28:35] and cipher
365 LXVD2X (R0+R5), V1
366 LXVD2X (R6+R5), V2
367 VCIPHER V0, V1, V0
368 VCIPHER V0, V2, V0
369
370 // Load xk[36:43] and cipher
371 LXVD2X (R7+R5), V1
372 LXVD2X (R8+R5), V2
373 BEQ CR1, Ldec_tail // Key size 10?
374 VCIPHER V0, V1, V0
375 VCIPHER V0, V2, V0
376
377 // Load xk[44:51] and cipher
378 LXVD2X (R9+R5), V1
379 LXVD2X (R10+R5), V2
380 BEQ CR2, Ldec_tail // Key size 12?
381 VCIPHER V0, V1, V0
382 VCIPHER V0, V2, V0
383
384 // Load xk[52:59] and cipher
385 LXVD2X (R11+R5), V1
386 LXVD2X (R12+R5), V2
387 BNE CR3, Linvalid_key_len // Not key size 14?
388 // Fallthrough to final cipher
389
390 Ldec_tail:
391 // Cipher last two keys such that key information is
392 // cleared from V1 and V2.
393 VCIPHER V0, V1, V1
394 VCIPHERLAST V1, V2, V2
395
396 // Store the result in BE order.
397 P8_STXVB16X(V2, R3, R0)
398 RET
399
400 Linvalid_key_len:
401 // Segfault, this should never happen. Only 3 keys sizes are created/used.
402 MOVD R0, 0(R0)
403 RET
404
405 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
406 TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
407 MOVD nr+0(FP), R6 // Round count/Key size
408 MOVD xk+8(FP), R5 // Key pointer
409 MOVD dst+16(FP), R3 // Dest pointer
410 MOVD src+24(FP), R4 // Src pointer
411 #ifdef NEEDS_ESPERM
412 MOVD $·rcon(SB), R7
413 LVX (R7), ESPERM // Permute value for P8_ macros.
414 #endif
415
416 // Set CR{1,2,3}EQ to hold the key size information.
417 CMPU R6, $10, CR1
418 CMPU R6, $12, CR2
419 CMPU R6, $14, CR3
420
421 MOVD $16, R6
422 MOVD $32, R7
423 MOVD $48, R8
424 MOVD $64, R9
425 MOVD $80, R10
426 MOVD $96, R11
427 MOVD $112, R12
428
429 // Load text in BE order
430 P8_LXVB16X(R4, R0, V0)
431
432 // V1, V2 will hold keys, V0 is a temp.
433 // At completion, V2 will hold the text.
434 // Load xk[0:3] and xor with ciphertext
435 LXVD2X (R0+R5), V1
436 VXOR V0, V1, V0
437
438 // Load xk[4:11] and cipher
439 LXVD2X (R6+R5), V1
440 LXVD2X (R7+R5), V2
441 VNCIPHER V0, V1, V0
442 VNCIPHER V0, V2, V0
443
444 // Load xk[12:19] and cipher
445 LXVD2X (R8+R5), V1
446 LXVD2X (R9+R5), V2
447 VNCIPHER V0, V1, V0
448 VNCIPHER V0, V2, V0
449
450 // Load xk[20:27] and cipher
451 LXVD2X (R10+R5), V1
452 LXVD2X (R11+R5), V2
453 VNCIPHER V0, V1, V0
454 VNCIPHER V0, V2, V0
455
456 // Increment xk pointer to reuse constant offsets in R6-R12.
457 ADD $112, R5
458
459 // Load xk[28:35] and cipher
460 LXVD2X (R0+R5), V1
461 LXVD2X (R6+R5), V2
462 VNCIPHER V0, V1, V0
463 VNCIPHER V0, V2, V0
464
465 // Load xk[36:43] and cipher
466 LXVD2X (R7+R5), V1
467 LXVD2X (R8+R5), V2
468 BEQ CR1, Ldec_tail // Key size 10?
469 VNCIPHER V0, V1, V0
470 VNCIPHER V0, V2, V0
471
472 // Load xk[44:51] and cipher
473 LXVD2X (R9+R5), V1
474 LXVD2X (R10+R5), V2
475 BEQ CR2, Ldec_tail // Key size 12?
476 VNCIPHER V0, V1, V0
477 VNCIPHER V0, V2, V0
478
479 // Load xk[52:59] and cipher
480 LXVD2X (R11+R5), V1
481 LXVD2X (R12+R5), V2
482 BNE CR3, Linvalid_key_len // Not key size 14?
483 // Fallthrough to final cipher
484
485 Ldec_tail:
486 // Cipher last two keys such that key information is
487 // cleared from V1 and V2.
488 VNCIPHER V0, V1, V1
489 VNCIPHERLAST V1, V2, V2
490
491 // Store the result in BE order.
492 P8_STXVB16X(V2, R3, R0)
493 RET
494
495 Linvalid_key_len:
496 // Segfault, this should never happen. Only 3 keys sizes are created/used.
497 MOVD R0, 0(R0)
498 RET
499
500 // Remove defines from above so they can be defined here
501 #undef INP
502 #undef OUTENC
503 #undef ROUNDS
504 #undef KEY
505 #undef TMP
506
507 #define INP R3
508 #define OUTP R4
509 #define LEN R5
510 #define KEYP R6
511 #define ROUNDS R7
512 #define IVP R8
513 #define ENC R9
514
515 #define INOUT V2
516 #define TMP V3
517 #define IVEC V4
518
519 // Load the crypt key into VSRs.
520 //
521 // The expanded key is stored and loaded using
522 // STXVD2X/LXVD2X. The in-memory byte ordering
523 // depends on the endianness of the machine. The
524 // expanded keys are generated by expandKeyAsm above.
525 //
526 // Rkeyp holds the key pointer. It is clobbered. Once
527 // the expanded keys are loaded, it is not needed.
528 //
529 // R12,R14-R21 are scratch registers.
530 // For keyp of 10, V6, V11-V20 hold the expanded key.
531 // For keyp of 12, V6, V9-V20 hold the expanded key.
532 // For keyp of 14, V6, V7-V20 hold the expanded key.
533 #define LOAD_KEY(Rkeyp) \
534 MOVD $16, R12 \
535 MOVD $32, R14 \
536 MOVD $48, R15 \
537 MOVD $64, R16 \
538 MOVD $80, R17 \
539 MOVD $96, R18 \
540 MOVD $112, R19 \
541 MOVD $128, R20 \
542 MOVD $144, R21 \
543 LXVD2X (R0+Rkeyp), V6 \
544 ADD $16, Rkeyp \
545 BEQ CR1, L_start10 \
546 BEQ CR2, L_start12 \
547 LXVD2X (R0+Rkeyp), V7 \
548 LXVD2X (R12+Rkeyp), V8 \
549 ADD $32, Rkeyp \
550 L_start12: \
551 LXVD2X (R0+Rkeyp), V9 \
552 LXVD2X (R12+Rkeyp), V10 \
553 ADD $32, Rkeyp \
554 L_start10: \
555 LXVD2X (R0+Rkeyp), V11 \
556 LXVD2X (R12+Rkeyp), V12 \
557 LXVD2X (R14+Rkeyp), V13 \
558 LXVD2X (R15+Rkeyp), V14 \
559 LXVD2X (R16+Rkeyp), V15 \
560 LXVD2X (R17+Rkeyp), V16 \
561 LXVD2X (R18+Rkeyp), V17 \
562 LXVD2X (R19+Rkeyp), V18 \
563 LXVD2X (R20+Rkeyp), V19 \
564 LXVD2X (R21+Rkeyp), V20
565
566 // Perform aes cipher operation for keysize 10/12/14 using the keys
567 // loaded by LOAD_KEY, and key size information held in CR1EQ/CR2EQ.
568 //
569 // Vxor is ideally V6 (Key[0-3]), but for slightly improved encrypting
570 // performance V6 and IVEC can be swapped (xor is both associative and
571 // commutative) during encryption:
572 //
573 // VXOR INOUT, IVEC, INOUT
574 // VXOR INOUT, V6, INOUT
575 //
576 // into
577 //
578 // VXOR INOUT, V6, INOUT
579 // VXOR INOUT, IVEC, INOUT
580 //
581 #define CIPHER_BLOCK(Vin, Vxor, Vout, vcipher, vciphel, label10, label12) \
582 VXOR Vin, Vxor, Vout \
583 BEQ CR1, label10 \
584 BEQ CR2, label12 \
585 vcipher Vout, V7, Vout \
586 vcipher Vout, V8, Vout \
587 label12: \
588 vcipher Vout, V9, Vout \
589 vcipher Vout, V10, Vout \
590 label10: \
591 vcipher Vout, V11, Vout \
592 vcipher Vout, V12, Vout \
593 vcipher Vout, V13, Vout \
594 vcipher Vout, V14, Vout \
595 vcipher Vout, V15, Vout \
596 vcipher Vout, V16, Vout \
597 vcipher Vout, V17, Vout \
598 vcipher Vout, V18, Vout \
599 vcipher Vout, V19, Vout \
600 vciphel Vout, V20, Vout \
601
602 #define CLEAR_KEYS() \
603 VXOR V6, V6, V6 \
604 VXOR V7, V7, V7 \
605 VXOR V8, V8, V8 \
606 VXOR V9, V9, V9 \
607 VXOR V10, V10, V10 \
608 VXOR V11, V11, V11 \
609 VXOR V12, V12, V12 \
610 VXOR V13, V13, V13 \
611 VXOR V14, V14, V14 \
612 VXOR V15, V15, V15 \
613 VXOR V16, V16, V16 \
614 VXOR V17, V17, V17 \
615 VXOR V18, V18, V18 \
616 VXOR V19, V19, V19 \
617 VXOR V20, V20, V20
618
619 //func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int)
620 TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
621 MOVD src+0(FP), INP
622 MOVD dst+8(FP), OUTP
623 MOVD length+16(FP), LEN
624 MOVD key+24(FP), KEYP
625 MOVD iv+32(FP), IVP
626 MOVD enc+40(FP), ENC
627 MOVD nr+48(FP), ROUNDS
628
629 #ifdef NEEDS_ESPERM
630 MOVD $·rcon(SB), R11
631 LVX (R11), ESPERM // Permute value for P8_ macros.
632 #endif
633
634 // Assume len > 0 && len % blockSize == 0.
635 CMPW ENC, $0
636 P8_LXVB16X(IVP, R0, IVEC)
637 CMPU ROUNDS, $10, CR1
638 CMPU ROUNDS, $12, CR2 // Only sizes 10/12/14 are supported.
639
640 // Setup key in VSRs, and set loop count in CTR.
641 LOAD_KEY(KEYP)
642 SRD $4, LEN
643 MOVD LEN, CTR
644
645 BEQ Lcbc_dec
646
647 PCALIGN $16
648 Lcbc_enc:
649 P8_LXVB16X(INP, R0, INOUT)
650 ADD $16, INP
651 VXOR INOUT, V6, INOUT
652 CIPHER_BLOCK(INOUT, IVEC, INOUT, VCIPHER, VCIPHERLAST, Lcbc_enc10, Lcbc_enc12)
653 VOR INOUT, INOUT, IVEC // ciphertext (INOUT) is IVEC for next block.
654 P8_STXVB16X(INOUT, OUTP, R0)
655 ADD $16, OUTP
656 BDNZ Lcbc_enc
657
658 P8_STXVB16X(INOUT, IVP, R0)
659 CLEAR_KEYS()
660 RET
661
662 PCALIGN $16
663 Lcbc_dec:
664 P8_LXVB16X(INP, R0, TMP)
665 ADD $16, INP
666 CIPHER_BLOCK(TMP, V6, INOUT, VNCIPHER, VNCIPHERLAST, Lcbc_dec10, Lcbc_dec12)
667 VXOR INOUT, IVEC, INOUT
668 VOR TMP, TMP, IVEC // TMP is IVEC for next block.
669 P8_STXVB16X(INOUT, OUTP, R0)
670 ADD $16, OUTP
671 BDNZ Lcbc_dec
672
673 P8_STXVB16X(IVEC, IVP, R0)
674 CLEAR_KEYS()
675 RET
676
View as plain text