Text file src/crypto/aes/asm_ppc64x.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build (ppc64 || ppc64le) && !purego
     6  
     7  // Based on CRYPTOGAMS code with the following comment:
     8  // # ====================================================================
     9  // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    10  // # project. The module is, however, dual licensed under OpenSSL and
    11  // # CRYPTOGAMS licenses depending on where you obtain it. For further
    12  // # details see http://www.openssl.org/~appro/cryptogams/.
    13  // # ====================================================================
    14  
    15  // Original code can be found at the link below:
    16  // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
    17  
    18  // Some function names were changed to be consistent with Go function
    19  // names. For instance, function aes_p8_set_{en,de}crypt_key become
    20  // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
    21  // and a new session was created (doEncryptKeyAsm). This was necessary to
    22  // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
    23  // There were other modifications as well but kept the same functionality.
    24  
    25  #include "textflag.h"
    26  
    27  // For expandKeyAsm
    28  #define INP     R3
    29  #define BITS    R4
    30  #define OUTENC  R5 // Pointer to next expanded encrypt key
    31  #define PTR     R6
    32  #define CNT     R7
    33  #define ROUNDS  R8
    34  #define OUTDEC  R9  // Pointer to next expanded decrypt key
    35  #define TEMP    R19
    36  #define ZERO    V0
    37  #define IN0     V1
    38  #define IN1     V2
    39  #define KEY     V3
    40  #define RCON    V4
    41  #define MASK    V5
    42  #define TMP     V6
    43  #define STAGE   V7
    44  #define OUTPERM V8
    45  #define OUTMASK V9
    46  #define OUTHEAD V10
    47  #define OUTTAIL V11
    48  
    49  // For P9 instruction emulation
    50  #define ESPERM  V21  // Endian swapping permute into BE
    51  #define TMP2    V22  // Temporary for P8_STXVB16X/P8_STXVB16X
    52  
    53  // For {en,de}cryptBlockAsm
    54  #define BLK_INP    R3
    55  #define BLK_OUT    R4
    56  #define BLK_KEY    R5
    57  #define BLK_ROUNDS R6
    58  #define BLK_IDX    R7
    59  
    60  DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap
    61  DATA ·rcon+0x08(SB)/8, $0x0706050403020100
    62  DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON
    63  DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON
    64  DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000
    65  DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000
    66  DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    67  DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    68  DATA ·rcon+0x40(SB)/8, $0x0000000000000000
    69  DATA ·rcon+0x48(SB)/8, $0x0000000000000000
    70  GLOBL ·rcon(SB), RODATA, $80
    71  
    72  #ifdef GOARCH_ppc64le
    73  #  ifdef GOPPC64_power9
    74  #define P8_LXVB16X(RA,RB,VT)  LXVB16X	(RA+RB), VT
    75  #define P8_STXVB16X(VS,RA,RB) STXVB16X	VS, (RA+RB)
    76  #define XXBRD_ON_LE(VA,VT)    XXBRD	VA, VT
    77  #  else
    78  // On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
    79  // doublewords and byte-swapping each doubleword to emulate BE load/stores.
    80  #define NEEDS_ESPERM
    81  #define P8_LXVB16X(RA,RB,VT) \
    82  	LXVD2X	(RA+RB), VT \
    83  	VPERM	VT, VT, ESPERM, VT
    84  
    85  #define P8_STXVB16X(VS,RA,RB) \
    86  	VPERM	VS, VS, ESPERM, TMP2 \
    87  	STXVD2X	TMP2, (RA+RB)
    88  
    89  #define XXBRD_ON_LE(VA,VT) \
    90  	VPERM	VA, VA, ESPERM, VT
    91  
    92  #  endif // defined(GOPPC64_power9)
    93  #else
    94  #define P8_LXVB16X(RA,RB,VT)  LXVD2X	(RA+RB), VT
    95  #define P8_STXVB16X(VS,RA,RB) STXVD2X	VS, (RA+RB)
    96  #define XXBRD_ON_LE(VA, VT)
    97  #endif // defined(GOARCH_ppc64le)
    98  
    99  // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
   100  TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
   101  	// Load the arguments inside the registers
   102  	MOVD	nr+0(FP), ROUNDS
   103  	MOVD	key+8(FP), INP
   104  	MOVD	enc+16(FP), OUTENC
   105  	MOVD	dec+24(FP), OUTDEC
   106  
   107  #ifdef NEEDS_ESPERM
   108  	MOVD	$·rcon(SB), PTR // PTR points to rcon addr
   109  	LVX	(PTR), ESPERM
   110  	ADD	$0x10, PTR
   111  #else
   112  	MOVD	$·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector)
   113  #endif
   114  
   115  	// Get key from memory and write aligned into VR
   116  	P8_LXVB16X(INP, R0, IN0)
   117  	ADD	$0x10, INP, INP
   118  	MOVD	$0x20, TEMP
   119  
   120  	CMPW	ROUNDS, $12
   121  	LVX	(PTR)(R0), RCON    // lvx   4,0,6      Load first 16 bytes into RCON
   122  	LVX	(PTR)(TEMP), MASK
   123  	ADD	$0x10, PTR, PTR    // addi  6,6,0x10   PTR to next 16 bytes of RCON
   124  	MOVD	$8, CNT            // li    7,8        CNT = 8
   125  	VXOR	ZERO, ZERO, ZERO   // vxor  0,0,0      Zero to be zero :)
   126  	MOVD	CNT, CTR           // mtctr 7          Set the counter to 8 (rounds)
   127  
   128  	// The expanded decrypt key is the expanded encrypt key stored in reverse order.
   129  	// Move OUTDEC to the last key location, and store in descending order.
   130  	ADD	$160, OUTDEC, OUTDEC
   131  	BLT	loop128
   132  	ADD	$32, OUTDEC, OUTDEC
   133  	BEQ	l192
   134  	ADD	$32, OUTDEC, OUTDEC
   135  	JMP	l256
   136  
   137  loop128:
   138  	// Key schedule (Round 1 to 8)
   139  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5         Rotate-n-splat
   140  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   141  	STXVD2X	IN0, (R0+OUTENC)
   142  	STXVD2X	IN0, (R0+OUTDEC)
   143  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   144  	ADD	$16, OUTENC, OUTENC
   145  	ADD	$-16, OUTDEC, OUTDEC
   146  
   147  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   148  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   149  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   150  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   151  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   152  	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   153  	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   154  	BDNZ	loop128
   155  
   156  	LVX	(PTR)(R0), RCON // lvx 4,0,6     Last two round keys
   157  
   158  	// Key schedule (Round 9)
   159  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-spat
   160  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   161  	STXVD2X	IN0, (R0+OUTENC)
   162  	STXVD2X	IN0, (R0+OUTDEC)
   163  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   164  	ADD	$16, OUTENC, OUTENC
   165  	ADD	$-16, OUTDEC, OUTDEC
   166  
   167  	// Key schedule (Round 10)
   168  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   169  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   170  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   171  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   172  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   173  	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   174  	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   175  
   176  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-splat
   177  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   178  	STXVD2X	IN0, (R0+OUTENC)
   179  	STXVD2X	IN0, (R0+OUTDEC)
   180  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   181  	ADD	$16, OUTENC, OUTENC
   182  	ADD	$-16, OUTDEC, OUTDEC
   183  
   184  	// Key schedule (Round 11)
   185  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   186  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   187  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   188  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   189  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   190  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   191  	STXVD2X	IN0, (R0+OUTENC)
   192  	STXVD2X	IN0, (R0+OUTDEC)
   193  
   194  	RET
   195  
   196  l192:
   197  	LXSDX	(INP+R0), IN1                    // Load next 8 bytes into upper half of VSR.
   198  	XXBRD_ON_LE(IN1, IN1)                    // and convert to BE ordering on LE hosts.
   199  	MOVD	$4, CNT                          // li 7,4
   200  	STXVD2X	IN0, (R0+OUTENC)
   201  	STXVD2X	IN0, (R0+OUTDEC)
   202  	ADD	$16, OUTENC, OUTENC
   203  	ADD	$-16, OUTDEC, OUTDEC
   204  	VSPLTISB	$8, KEY                  // vspltisb 3,8
   205  	MOVD	CNT, CTR                         // mtctr 7
   206  	VSUBUBM	MASK, KEY, MASK                  // vsububm 5,5,3
   207  
   208  loop192:
   209  	VPERM	IN1, IN1, MASK, KEY // vperm 3,2,2,5
   210  	VSLDOI	$12, ZERO, IN0, TMP // vsldoi 6,0,1,12
   211  	VCIPHERLAST	KEY, RCON, KEY      // vcipherlast 3,3,4
   212  
   213  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   214  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   215  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   216  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   217  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   218  
   219  	VSLDOI	$8, ZERO, IN1, STAGE  // vsldoi 7,0,2,8
   220  	VSPLTW	$3, IN0, TMP          // vspltw 6,1,3
   221  	VXOR	TMP, IN1, TMP         // vxor 6,6,2
   222  	VSLDOI	$12, ZERO, IN1, IN1   // vsldoi 2,0,2,12
   223  	VADDUWM	RCON, RCON, RCON      // vadduwm 4,4,4
   224  	VXOR	IN1, TMP, IN1         // vxor 2,2,6
   225  	VXOR	IN0, KEY, IN0         // vxor 1,1,3
   226  	VXOR	IN1, KEY, IN1         // vxor 2,2,3
   227  	VSLDOI	$8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
   228  
   229  	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   230  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   231  	STXVD2X	STAGE, (R0+OUTENC)
   232  	STXVD2X	STAGE, (R0+OUTDEC)
   233  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   234  	ADD	$16, OUTENC, OUTENC
   235  	ADD	$-16, OUTDEC, OUTDEC
   236  
   237  	VSLDOI	$8, IN0, IN1, STAGE              // vsldoi 7,1,2,8
   238  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   239  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   240  	STXVD2X	STAGE, (R0+OUTENC)
   241  	STXVD2X	STAGE, (R0+OUTDEC)
   242  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   243  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   244  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   245  	ADD	$16, OUTENC, OUTENC
   246  	ADD	$-16, OUTDEC, OUTDEC
   247  
   248  	VSPLTW	$3, IN0, TMP                     // vspltw 6,1,3
   249  	VXOR	TMP, IN1, TMP                    // vxor 6,6,2
   250  	VSLDOI	$12, ZERO, IN1, IN1              // vsldoi 2,0,2,12
   251  	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   252  	VXOR	IN1, TMP, IN1                    // vxor 2,2,6
   253  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   254  	VXOR	IN1, KEY, IN1                    // vxor 2,2,3
   255  	STXVD2X	IN0, (R0+OUTENC)
   256  	STXVD2X	IN0, (R0+OUTDEC)
   257  	ADD	$16, OUTENC, OUTENC
   258  	ADD	$-16, OUTDEC, OUTDEC
   259  	BDNZ	loop192
   260  
   261  	RET
   262  
   263  l256:
   264  	P8_LXVB16X(INP, R0, IN1)
   265  	MOVD	$7, CNT                          // li 7,7
   266  	STXVD2X	IN0, (R0+OUTENC)
   267  	STXVD2X	IN0, (R0+OUTDEC)
   268  	ADD	$16, OUTENC, OUTENC
   269  	ADD	$-16, OUTDEC, OUTDEC
   270  	MOVD	CNT, CTR                         // mtctr 7
   271  
   272  loop256:
   273  	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   274  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   275  	STXVD2X	IN1, (R0+OUTENC)
   276  	STXVD2X	IN1, (R0+OUTDEC)
   277  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   278  	ADD	$16, OUTENC, OUTENC
   279  	ADD	$-16, OUTDEC, OUTDEC
   280  
   281  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   282  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   283  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   284  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   285  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   286  	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   287  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   288  	STXVD2X	IN0, (R0+OUTENC)
   289  	STXVD2X	IN0, (R0+OUTDEC)
   290  	ADD	$16, OUTENC, OUTENC
   291  	ADD	$-16, OUTDEC, OUTDEC
   292  	BDZ	done
   293  
   294  	VSPLTW	$3, IN0, KEY        // vspltw 3,1,3
   295  	VSLDOI	$12, ZERO, IN1, TMP // vsldoi 6,0,2,12
   296  	VSBOX	KEY, KEY            // vsbox 3,3
   297  
   298  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   299  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   300  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   301  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   302  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   303  
   304  	VXOR	IN1, KEY, IN1 // vxor 2,2,3
   305  	JMP	loop256       // b .Loop256
   306  
   307  done:
   308  	RET
   309  
   310  // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
   311  TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   312  	MOVD	nr+0(FP), R6   // Round count/Key size
   313  	MOVD	xk+8(FP), R5   // Key pointer
   314  	MOVD	dst+16(FP), R3 // Dest pointer
   315  	MOVD	src+24(FP), R4 // Src pointer
   316  #ifdef NEEDS_ESPERM
   317  	MOVD	$·rcon(SB), R7
   318  	LVX	(R7), ESPERM   // Permute value for P8_ macros.
   319  #endif
   320  
   321  	// Set CR{1,2,3}EQ to hold the key size information.
   322  	CMPU	R6, $10, CR1
   323  	CMPU	R6, $12, CR2
   324  	CMPU	R6, $14, CR3
   325  
   326  	MOVD	$16, R6
   327  	MOVD	$32, R7
   328  	MOVD	$48, R8
   329  	MOVD	$64, R9
   330  	MOVD	$80, R10
   331  	MOVD	$96, R11
   332  	MOVD	$112, R12
   333  
   334  	// Load text in BE order
   335  	P8_LXVB16X(R4, R0, V0)
   336  
   337  	// V1, V2 will hold keys, V0 is a temp.
   338  	// At completion, V2 will hold the ciphertext.
   339  	// Load xk[0:3] and xor with text
   340  	LXVD2X	(R0+R5), V1
   341  	VXOR	V0, V1, V0
   342  
   343  	// Load xk[4:11] and cipher
   344  	LXVD2X	(R6+R5), V1
   345  	LXVD2X	(R7+R5), V2
   346  	VCIPHER	V0, V1, V0
   347  	VCIPHER	V0, V2, V0
   348  
   349  	// Load xk[12:19] and cipher
   350  	LXVD2X	(R8+R5), V1
   351  	LXVD2X	(R9+R5), V2
   352  	VCIPHER	V0, V1, V0
   353  	VCIPHER	V0, V2, V0
   354  
   355  	// Load xk[20:27] and cipher
   356  	LXVD2X	(R10+R5), V1
   357  	LXVD2X	(R11+R5), V2
   358  	VCIPHER	V0, V1, V0
   359  	VCIPHER	V0, V2, V0
   360  
   361  	// Increment xk pointer to reuse constant offsets in R6-R12.
   362  	ADD	$112, R5
   363  
   364  	// Load xk[28:35] and cipher
   365  	LXVD2X	(R0+R5), V1
   366  	LXVD2X	(R6+R5), V2
   367  	VCIPHER	V0, V1, V0
   368  	VCIPHER	V0, V2, V0
   369  
   370  	// Load xk[36:43] and cipher
   371  	LXVD2X	(R7+R5), V1
   372  	LXVD2X	(R8+R5), V2
   373  	BEQ	CR1, Ldec_tail // Key size 10?
   374  	VCIPHER	V0, V1, V0
   375  	VCIPHER	V0, V2, V0
   376  
   377  	// Load xk[44:51] and cipher
   378  	LXVD2X	(R9+R5), V1
   379  	LXVD2X	(R10+R5), V2
   380  	BEQ	CR2, Ldec_tail // Key size 12?
   381  	VCIPHER	V0, V1, V0
   382  	VCIPHER	V0, V2, V0
   383  
   384  	// Load xk[52:59] and cipher
   385  	LXVD2X	(R11+R5), V1
   386  	LXVD2X	(R12+R5), V2
   387  	BNE	CR3, Linvalid_key_len // Not key size 14?
   388  	// Fallthrough to final cipher
   389  
   390  Ldec_tail:
   391  	// Cipher last two keys such that key information is
   392  	// cleared from V1 and V2.
   393  	VCIPHER		V0, V1, V1
   394  	VCIPHERLAST	V1, V2, V2
   395  
   396  	// Store the result in BE order.
   397  	P8_STXVB16X(V2, R3, R0)
   398  	RET
   399  
   400  Linvalid_key_len:
   401  	// Segfault, this should never happen. Only 3 keys sizes are created/used.
   402  	MOVD	R0, 0(R0)
   403  	RET
   404  
   405  // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
   406  TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   407  	MOVD	nr+0(FP), R6   // Round count/Key size
   408  	MOVD	xk+8(FP), R5   // Key pointer
   409  	MOVD	dst+16(FP), R3 // Dest pointer
   410  	MOVD	src+24(FP), R4 // Src pointer
   411  #ifdef NEEDS_ESPERM
   412  	MOVD	$·rcon(SB), R7
   413  	LVX	(R7), ESPERM   // Permute value for P8_ macros.
   414  #endif
   415  
   416  	// Set CR{1,2,3}EQ to hold the key size information.
   417  	CMPU	R6, $10, CR1
   418  	CMPU	R6, $12, CR2
   419  	CMPU	R6, $14, CR3
   420  
   421  	MOVD	$16, R6
   422  	MOVD	$32, R7
   423  	MOVD	$48, R8
   424  	MOVD	$64, R9
   425  	MOVD	$80, R10
   426  	MOVD	$96, R11
   427  	MOVD	$112, R12
   428  
   429  	// Load text in BE order
   430  	P8_LXVB16X(R4, R0, V0)
   431  
   432  	// V1, V2 will hold keys, V0 is a temp.
   433  	// At completion, V2 will hold the text.
   434  	// Load xk[0:3] and xor with ciphertext
   435  	LXVD2X	(R0+R5), V1
   436  	VXOR	V0, V1, V0
   437  
   438  	// Load xk[4:11] and cipher
   439  	LXVD2X	(R6+R5), V1
   440  	LXVD2X	(R7+R5), V2
   441  	VNCIPHER	V0, V1, V0
   442  	VNCIPHER	V0, V2, V0
   443  
   444  	// Load xk[12:19] and cipher
   445  	LXVD2X	(R8+R5), V1
   446  	LXVD2X	(R9+R5), V2
   447  	VNCIPHER	V0, V1, V0
   448  	VNCIPHER	V0, V2, V0
   449  
   450  	// Load xk[20:27] and cipher
   451  	LXVD2X	(R10+R5), V1
   452  	LXVD2X	(R11+R5), V2
   453  	VNCIPHER	V0, V1, V0
   454  	VNCIPHER	V0, V2, V0
   455  
   456  	// Increment xk pointer to reuse constant offsets in R6-R12.
   457  	ADD	$112, R5
   458  
   459  	// Load xk[28:35] and cipher
   460  	LXVD2X	(R0+R5), V1
   461  	LXVD2X	(R6+R5), V2
   462  	VNCIPHER	V0, V1, V0
   463  	VNCIPHER	V0, V2, V0
   464  
   465  	// Load xk[36:43] and cipher
   466  	LXVD2X	(R7+R5), V1
   467  	LXVD2X	(R8+R5), V2
   468  	BEQ	CR1, Ldec_tail // Key size 10?
   469  	VNCIPHER	V0, V1, V0
   470  	VNCIPHER	V0, V2, V0
   471  
   472  	// Load xk[44:51] and cipher
   473  	LXVD2X	(R9+R5), V1
   474  	LXVD2X	(R10+R5), V2
   475  	BEQ	CR2, Ldec_tail // Key size 12?
   476  	VNCIPHER	V0, V1, V0
   477  	VNCIPHER	V0, V2, V0
   478  
   479  	// Load xk[52:59] and cipher
   480  	LXVD2X	(R11+R5), V1
   481  	LXVD2X	(R12+R5), V2
   482  	BNE	CR3, Linvalid_key_len // Not key size 14?
   483  	// Fallthrough to final cipher
   484  
   485  Ldec_tail:
   486  	// Cipher last two keys such that key information is
   487  	// cleared from V1 and V2.
   488  	VNCIPHER	V0, V1, V1
   489  	VNCIPHERLAST	V1, V2, V2
   490  
   491  	// Store the result in BE order.
   492  	P8_STXVB16X(V2, R3, R0)
   493  	RET
   494  
   495  Linvalid_key_len:
   496  	// Segfault, this should never happen. Only 3 keys sizes are created/used.
   497  	MOVD	R0, 0(R0)
   498  	RET
   499  
   500  // Remove defines from above so they can be defined here
   501  #undef INP
   502  #undef OUTENC
   503  #undef ROUNDS
   504  #undef KEY
   505  #undef TMP
   506  
   507  #define INP R3
   508  #define OUTP R4
   509  #define LEN R5
   510  #define KEYP R6
   511  #define ROUNDS R7
   512  #define IVP R8
   513  #define ENC R9
   514  
   515  #define INOUT V2
   516  #define TMP V3
   517  #define IVEC V4
   518  
   519  // Load the crypt key into VSRs.
   520  //
   521  // The expanded key is stored and loaded using
   522  // STXVD2X/LXVD2X. The in-memory byte ordering
   523  // depends on the endianness of the machine. The
   524  // expanded keys are generated by expandKeyAsm above.
   525  //
   526  // Rkeyp holds the key pointer. It is clobbered. Once
   527  // the expanded keys are loaded, it is not needed.
   528  //
   529  // R12,R14-R21 are scratch registers.
   530  // For keyp of 10, V6, V11-V20 hold the expanded key.
   531  // For keyp of 12, V6, V9-V20 hold the expanded key.
   532  // For keyp of 14, V6, V7-V20 hold the expanded key.
   533  #define LOAD_KEY(Rkeyp) \
   534  	MOVD	$16, R12 \
   535  	MOVD	$32, R14 \
   536  	MOVD	$48, R15 \
   537  	MOVD	$64, R16 \
   538  	MOVD	$80, R17 \
   539  	MOVD	$96, R18 \
   540  	MOVD	$112, R19 \
   541  	MOVD	$128, R20 \
   542  	MOVD	$144, R21 \
   543  	LXVD2X	(R0+Rkeyp), V6 \
   544  	ADD	$16, Rkeyp \
   545  	BEQ	CR1, L_start10 \
   546  	BEQ	CR2, L_start12 \
   547  	LXVD2X	(R0+Rkeyp), V7 \
   548  	LXVD2X	(R12+Rkeyp), V8 \
   549  	ADD	$32, Rkeyp \
   550  	L_start12: \
   551  	LXVD2X	(R0+Rkeyp), V9 \
   552  	LXVD2X	(R12+Rkeyp), V10 \
   553  	ADD	$32, Rkeyp \
   554  	L_start10: \
   555  	LXVD2X	(R0+Rkeyp), V11 \
   556  	LXVD2X	(R12+Rkeyp), V12 \
   557  	LXVD2X	(R14+Rkeyp), V13 \
   558  	LXVD2X	(R15+Rkeyp), V14 \
   559  	LXVD2X	(R16+Rkeyp), V15 \
   560  	LXVD2X	(R17+Rkeyp), V16 \
   561  	LXVD2X	(R18+Rkeyp), V17 \
   562  	LXVD2X	(R19+Rkeyp), V18 \
   563  	LXVD2X	(R20+Rkeyp), V19 \
   564  	LXVD2X	(R21+Rkeyp), V20
   565  
   566  // Perform aes cipher operation for keysize 10/12/14 using the keys
   567  // loaded by LOAD_KEY, and key size information held in CR1EQ/CR2EQ.
   568  //
   569  // Vxor is ideally V6 (Key[0-3]), but for slightly improved encrypting
   570  // performance V6 and IVEC can be swapped (xor is both associative and
   571  // commutative) during encryption:
   572  //
   573  //	VXOR INOUT, IVEC, INOUT
   574  //	VXOR INOUT, V6, INOUT
   575  //
   576  //	into
   577  //
   578  //	VXOR INOUT, V6, INOUT
   579  //	VXOR INOUT, IVEC, INOUT
   580  //
   581  #define CIPHER_BLOCK(Vin, Vxor, Vout, vcipher, vciphel, label10, label12) \
   582  	VXOR	Vin, Vxor, Vout \
   583  	BEQ	CR1, label10 \
   584  	BEQ	CR2, label12 \
   585  	vcipher	Vout, V7, Vout \
   586  	vcipher	Vout, V8, Vout \
   587  	label12: \
   588  	vcipher	Vout, V9, Vout \
   589  	vcipher	Vout, V10, Vout \
   590  	label10: \
   591  	vcipher	Vout, V11, Vout \
   592  	vcipher	Vout, V12, Vout \
   593  	vcipher	Vout, V13, Vout \
   594  	vcipher	Vout, V14, Vout \
   595  	vcipher	Vout, V15, Vout \
   596  	vcipher	Vout, V16, Vout \
   597  	vcipher	Vout, V17, Vout \
   598  	vcipher	Vout, V18, Vout \
   599  	vcipher	Vout, V19, Vout \
   600  	vciphel	Vout, V20, Vout \
   601  
   602  #define CLEAR_KEYS() \
   603  	VXOR	V6, V6, V6 \
   604  	VXOR	V7, V7, V7 \
   605  	VXOR	V8, V8, V8 \
   606  	VXOR	V9, V9, V9 \
   607  	VXOR	V10, V10, V10 \
   608  	VXOR	V11, V11, V11 \
   609  	VXOR	V12, V12, V12 \
   610  	VXOR	V13, V13, V13 \
   611  	VXOR	V14, V14, V14 \
   612  	VXOR	V15, V15, V15 \
   613  	VXOR	V16, V16, V16 \
   614  	VXOR	V17, V17, V17 \
   615  	VXOR	V18, V18, V18 \
   616  	VXOR	V19, V19, V19 \
   617  	VXOR	V20, V20, V20
   618  
   619  //func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int)
   620  TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
   621  	MOVD	src+0(FP), INP
   622  	MOVD	dst+8(FP), OUTP
   623  	MOVD	length+16(FP), LEN
   624  	MOVD	key+24(FP), KEYP
   625  	MOVD	iv+32(FP), IVP
   626  	MOVD	enc+40(FP), ENC
   627  	MOVD	nr+48(FP), ROUNDS
   628  
   629  #ifdef NEEDS_ESPERM
   630  	MOVD	$·rcon(SB), R11
   631  	LVX	(R11), ESPERM   // Permute value for P8_ macros.
   632  #endif
   633  
   634  	// Assume len > 0 && len % blockSize == 0.
   635  	CMPW	ENC, $0
   636  	P8_LXVB16X(IVP, R0, IVEC)
   637  	CMPU	ROUNDS, $10, CR1
   638  	CMPU	ROUNDS, $12, CR2 // Only sizes 10/12/14 are supported.
   639  
   640  	// Setup key in VSRs, and set loop count in CTR.
   641  	LOAD_KEY(KEYP)
   642  	SRD	$4, LEN
   643  	MOVD	LEN, CTR
   644  
   645  	BEQ	Lcbc_dec
   646  
   647  	PCALIGN $16
   648  Lcbc_enc:
   649  	P8_LXVB16X(INP, R0, INOUT)
   650  	ADD	$16, INP
   651  	VXOR	INOUT, V6, INOUT
   652  	CIPHER_BLOCK(INOUT, IVEC, INOUT, VCIPHER, VCIPHERLAST, Lcbc_enc10, Lcbc_enc12)
   653  	VOR	INOUT, INOUT, IVEC // ciphertext (INOUT) is IVEC for next block.
   654  	P8_STXVB16X(INOUT, OUTP, R0)
   655  	ADD	$16, OUTP
   656  	BDNZ	Lcbc_enc
   657  
   658  	P8_STXVB16X(INOUT, IVP, R0)
   659  	CLEAR_KEYS()
   660  	RET
   661  
   662  	PCALIGN $16
   663  Lcbc_dec:
   664  	P8_LXVB16X(INP, R0, TMP)
   665  	ADD	$16, INP
   666  	CIPHER_BLOCK(TMP, V6, INOUT, VNCIPHER, VNCIPHERLAST, Lcbc_dec10, Lcbc_dec12)
   667  	VXOR	INOUT, IVEC, INOUT
   668  	VOR	TMP, TMP, IVEC // TMP is IVEC for next block.
   669  	P8_STXVB16X(INOUT, OUTP, R0)
   670  	ADD	$16, OUTP
   671  	BDNZ	Lcbc_dec
   672  
   673  	P8_STXVB16X(IVEC, IVP, R0)
   674  	CLEAR_KEYS()
   675  	RET
   676  

View as plain text