Text file src/vendor/golang.org/x/crypto/chacha20/chacha_s390x.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build gc && !purego
     6  
     7  #include "go_asm.h"
     8  #include "textflag.h"
     9  
    10  // This is an implementation of the ChaCha20 encryption algorithm as
    11  // specified in RFC 7539. It uses vector instructions to compute
    12  // 4 keystream blocks in parallel (256 bytes) which are then XORed
    13  // with the bytes in the input slice.
    14  
    15  GLOBL ·constants<>(SB), RODATA|NOPTR, $32
    16  // BSWAP: swap bytes in each 4-byte element
    17  DATA ·constants<>+0x00(SB)/4, $0x03020100
    18  DATA ·constants<>+0x04(SB)/4, $0x07060504
    19  DATA ·constants<>+0x08(SB)/4, $0x0b0a0908
    20  DATA ·constants<>+0x0c(SB)/4, $0x0f0e0d0c
    21  // J0: [j0, j1, j2, j3]
    22  DATA ·constants<>+0x10(SB)/4, $0x61707865
    23  DATA ·constants<>+0x14(SB)/4, $0x3320646e
    24  DATA ·constants<>+0x18(SB)/4, $0x79622d32
    25  DATA ·constants<>+0x1c(SB)/4, $0x6b206574
    26  
    27  #define BSWAP V5
    28  #define J0    V6
    29  #define KEY0  V7
    30  #define KEY1  V8
    31  #define NONCE V9
    32  #define CTR   V10
    33  #define M0    V11
    34  #define M1    V12
    35  #define M2    V13
    36  #define M3    V14
    37  #define INC   V15
    38  #define X0    V16
    39  #define X1    V17
    40  #define X2    V18
    41  #define X3    V19
    42  #define X4    V20
    43  #define X5    V21
    44  #define X6    V22
    45  #define X7    V23
    46  #define X8    V24
    47  #define X9    V25
    48  #define X10   V26
    49  #define X11   V27
    50  #define X12   V28
    51  #define X13   V29
    52  #define X14   V30
    53  #define X15   V31
    54  
    55  #define NUM_ROUNDS 20
    56  
    57  #define ROUND4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
    58  	VAF    a1, a0, a0  \
    59  	VAF    b1, b0, b0  \
    60  	VAF    c1, c0, c0  \
    61  	VAF    d1, d0, d0  \
    62  	VX     a0, a2, a2  \
    63  	VX     b0, b2, b2  \
    64  	VX     c0, c2, c2  \
    65  	VX     d0, d2, d2  \
    66  	VERLLF $16, a2, a2 \
    67  	VERLLF $16, b2, b2 \
    68  	VERLLF $16, c2, c2 \
    69  	VERLLF $16, d2, d2 \
    70  	VAF    a2, a3, a3  \
    71  	VAF    b2, b3, b3  \
    72  	VAF    c2, c3, c3  \
    73  	VAF    d2, d3, d3  \
    74  	VX     a3, a1, a1  \
    75  	VX     b3, b1, b1  \
    76  	VX     c3, c1, c1  \
    77  	VX     d3, d1, d1  \
    78  	VERLLF $12, a1, a1 \
    79  	VERLLF $12, b1, b1 \
    80  	VERLLF $12, c1, c1 \
    81  	VERLLF $12, d1, d1 \
    82  	VAF    a1, a0, a0  \
    83  	VAF    b1, b0, b0  \
    84  	VAF    c1, c0, c0  \
    85  	VAF    d1, d0, d0  \
    86  	VX     a0, a2, a2  \
    87  	VX     b0, b2, b2  \
    88  	VX     c0, c2, c2  \
    89  	VX     d0, d2, d2  \
    90  	VERLLF $8, a2, a2  \
    91  	VERLLF $8, b2, b2  \
    92  	VERLLF $8, c2, c2  \
    93  	VERLLF $8, d2, d2  \
    94  	VAF    a2, a3, a3  \
    95  	VAF    b2, b3, b3  \
    96  	VAF    c2, c3, c3  \
    97  	VAF    d2, d3, d3  \
    98  	VX     a3, a1, a1  \
    99  	VX     b3, b1, b1  \
   100  	VX     c3, c1, c1  \
   101  	VX     d3, d1, d1  \
   102  	VERLLF $7, a1, a1  \
   103  	VERLLF $7, b1, b1  \
   104  	VERLLF $7, c1, c1  \
   105  	VERLLF $7, d1, d1
   106  
   107  #define PERMUTE(mask, v0, v1, v2, v3) \
   108  	VPERM v0, v0, mask, v0 \
   109  	VPERM v1, v1, mask, v1 \
   110  	VPERM v2, v2, mask, v2 \
   111  	VPERM v3, v3, mask, v3
   112  
   113  #define ADDV(x, v0, v1, v2, v3) \
   114  	VAF x, v0, v0 \
   115  	VAF x, v1, v1 \
   116  	VAF x, v2, v2 \
   117  	VAF x, v3, v3
   118  
   119  #define XORV(off, dst, src, v0, v1, v2, v3) \
   120  	VLM  off(src), M0, M3          \
   121  	PERMUTE(BSWAP, v0, v1, v2, v3) \
   122  	VX   v0, M0, M0                \
   123  	VX   v1, M1, M1                \
   124  	VX   v2, M2, M2                \
   125  	VX   v3, M3, M3                \
   126  	VSTM M0, M3, off(dst)
   127  
   128  #define SHUFFLE(a, b, c, d, t, u, v, w) \
   129  	VMRHF a, c, t \ // t = {a[0], c[0], a[1], c[1]}
   130  	VMRHF b, d, u \ // u = {b[0], d[0], b[1], d[1]}
   131  	VMRLF a, c, v \ // v = {a[2], c[2], a[3], c[3]}
   132  	VMRLF b, d, w \ // w = {b[2], d[2], b[3], d[3]}
   133  	VMRHF t, u, a \ // a = {a[0], b[0], c[0], d[0]}
   134  	VMRLF t, u, b \ // b = {a[1], b[1], c[1], d[1]}
   135  	VMRHF v, w, c \ // c = {a[2], b[2], c[2], d[2]}
   136  	VMRLF v, w, d // d = {a[3], b[3], c[3], d[3]}
   137  
   138  // func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
   139  TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
   140  	MOVD $·constants<>(SB), R1
   141  	MOVD dst+0(FP), R2         // R2=&dst[0]
   142  	LMG  src+24(FP), R3, R4    // R3=&src[0] R4=len(src)
   143  	MOVD key+48(FP), R5        // R5=key
   144  	MOVD nonce+56(FP), R6      // R6=nonce
   145  	MOVD counter+64(FP), R7    // R7=counter
   146  
   147  	// load BSWAP and J0
   148  	VLM (R1), BSWAP, J0
   149  
   150  	// setup
   151  	MOVD  $95, R0
   152  	VLM   (R5), KEY0, KEY1
   153  	VLL   R0, (R6), NONCE
   154  	VZERO M0
   155  	VLEIB $7, $32, M0
   156  	VSRLB M0, NONCE, NONCE
   157  
   158  	// initialize counter values
   159  	VLREPF (R7), CTR
   160  	VZERO  INC
   161  	VLEIF  $1, $1, INC
   162  	VLEIF  $2, $2, INC
   163  	VLEIF  $3, $3, INC
   164  	VAF    INC, CTR, CTR
   165  	VREPIF $4, INC
   166  
   167  chacha:
   168  	VREPF $0, J0, X0
   169  	VREPF $1, J0, X1
   170  	VREPF $2, J0, X2
   171  	VREPF $3, J0, X3
   172  	VREPF $0, KEY0, X4
   173  	VREPF $1, KEY0, X5
   174  	VREPF $2, KEY0, X6
   175  	VREPF $3, KEY0, X7
   176  	VREPF $0, KEY1, X8
   177  	VREPF $1, KEY1, X9
   178  	VREPF $2, KEY1, X10
   179  	VREPF $3, KEY1, X11
   180  	VLR   CTR, X12
   181  	VREPF $1, NONCE, X13
   182  	VREPF $2, NONCE, X14
   183  	VREPF $3, NONCE, X15
   184  
   185  	MOVD $(NUM_ROUNDS/2), R1
   186  
   187  loop:
   188  	ROUND4(X0, X4, X12,  X8, X1, X5, X13,  X9, X2, X6, X14, X10, X3, X7, X15, X11)
   189  	ROUND4(X0, X5, X15, X10, X1, X6, X12, X11, X2, X7, X13, X8,  X3, X4, X14, X9)
   190  
   191  	ADD $-1, R1
   192  	BNE loop
   193  
   194  	// decrement length
   195  	ADD $-256, R4
   196  
   197  	// rearrange vectors
   198  	SHUFFLE(X0, X1, X2, X3, M0, M1, M2, M3)
   199  	ADDV(J0, X0, X1, X2, X3)
   200  	SHUFFLE(X4, X5, X6, X7, M0, M1, M2, M3)
   201  	ADDV(KEY0, X4, X5, X6, X7)
   202  	SHUFFLE(X8, X9, X10, X11, M0, M1, M2, M3)
   203  	ADDV(KEY1, X8, X9, X10, X11)
   204  	VAF CTR, X12, X12
   205  	SHUFFLE(X12, X13, X14, X15, M0, M1, M2, M3)
   206  	ADDV(NONCE, X12, X13, X14, X15)
   207  
   208  	// increment counters
   209  	VAF INC, CTR, CTR
   210  
   211  	// xor keystream with plaintext
   212  	XORV(0*64, R2, R3, X0, X4,  X8, X12)
   213  	XORV(1*64, R2, R3, X1, X5,  X9, X13)
   214  	XORV(2*64, R2, R3, X2, X6, X10, X14)
   215  	XORV(3*64, R2, R3, X3, X7, X11, X15)
   216  
   217  	// increment pointers
   218  	MOVD $256(R2), R2
   219  	MOVD $256(R3), R3
   220  
   221  	CMPBNE  R4, $0, chacha
   222  
   223  	VSTEF $0, CTR, (R7)
   224  	RET
   225  

View as plain text