Text file src/crypto/internal/fips140/subtle/xor_riscv64.s

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // func xorBytes(dst, a, b *byte, n int)
    10  TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
    11  	MOV	dst+0(FP), X10
    12  	MOV	a+8(FP), X11
    13  	MOV	b+16(FP), X12
    14  	MOV	n+24(FP), X13
    15  
    16  	MOV	$32, X15
    17  	BLT	X13, X15, loop4_check
    18  
    19  	// Check alignment - if alignment differs we have to do one byte at a time.
    20  	AND	$7, X10, X5
    21  	AND	$7, X11, X6
    22  	AND	$7, X12, X7
    23  	BNE	X5, X6, loop4_check
    24  	BNE	X5, X7, loop4_check
    25  	BEQZ	X5, loop64_check
    26  
    27  	// Check one byte at a time until we reach 8 byte alignment.
    28  	MOV	$8, X8
    29  	SUB	X5, X8
    30  	SUB	X8, X13
    31  align:
    32  	MOVBU	0(X11), X16
    33  	MOVBU	0(X12), X17
    34  	XOR	X16, X17
    35  	MOVB	X17, 0(X10)
    36  	ADD	$1, X10
    37  	ADD	$1, X11
    38  	ADD	$1, X12
    39  	SUB	$1, X8
    40  	BNEZ	X8, align
    41  
    42  loop64_check:
    43  	MOV	$64, X15
    44  	BLT	X13, X15, tail32_check
    45  	PCALIGN	$16
    46  loop64:
    47  	MOV	0(X11), X16
    48  	MOV	0(X12), X17
    49  	MOV	8(X11), X18
    50  	MOV	8(X12), X19
    51  	XOR	X16, X17
    52  	XOR	X18, X19
    53  	MOV	X17, 0(X10)
    54  	MOV	X19, 8(X10)
    55  	MOV	16(X11), X20
    56  	MOV	16(X12), X21
    57  	MOV	24(X11), X22
    58  	MOV	24(X12), X23
    59  	XOR	X20, X21
    60  	XOR	X22, X23
    61  	MOV	X21, 16(X10)
    62  	MOV	X23, 24(X10)
    63  	MOV	32(X11), X16
    64  	MOV	32(X12), X17
    65  	MOV	40(X11), X18
    66  	MOV	40(X12), X19
    67  	XOR	X16, X17
    68  	XOR	X18, X19
    69  	MOV	X17, 32(X10)
    70  	MOV	X19, 40(X10)
    71  	MOV	48(X11), X20
    72  	MOV	48(X12), X21
    73  	MOV	56(X11), X22
    74  	MOV	56(X12), X23
    75  	XOR	X20, X21
    76  	XOR	X22, X23
    77  	MOV	X21, 48(X10)
    78  	MOV	X23, 56(X10)
    79  	ADD	$64, X10
    80  	ADD	$64, X11
    81  	ADD	$64, X12
    82  	SUB	$64, X13
    83  	BGE	X13, X15, loop64
    84  	BEQZ	X13, done
    85  
    86  tail32_check:
    87  	MOV	$32, X15
    88  	BLT	X13, X15, tail16_check
    89  	MOV	0(X11), X16
    90  	MOV	0(X12), X17
    91  	MOV	8(X11), X18
    92  	MOV	8(X12), X19
    93  	XOR	X16, X17
    94  	XOR	X18, X19
    95  	MOV	X17, 0(X10)
    96  	MOV	X19, 8(X10)
    97  	MOV	16(X11), X20
    98  	MOV	16(X12), X21
    99  	MOV	24(X11), X22
   100  	MOV	24(X12), X23
   101  	XOR	X20, X21
   102  	XOR	X22, X23
   103  	MOV	X21, 16(X10)
   104  	MOV	X23, 24(X10)
   105  	ADD	$32, X10
   106  	ADD	$32, X11
   107  	ADD	$32, X12
   108  	SUB	$32, X13
   109  	BEQZ	X13, done
   110  
   111  tail16_check:
   112  	MOV	$16, X15
   113  	BLT	X13, X15, loop4_check
   114  	MOV	0(X11), X16
   115  	MOV	0(X12), X17
   116  	MOV	8(X11), X18
   117  	MOV	8(X12), X19
   118  	XOR	X16, X17
   119  	XOR	X18, X19
   120  	MOV	X17, 0(X10)
   121  	MOV	X19, 8(X10)
   122  	ADD	$16, X10
   123  	ADD	$16, X11
   124  	ADD	$16, X12
   125  	SUB	$16, X13
   126  	BEQZ	X13, done
   127  
   128  loop4_check:
   129  	MOV	$4, X15
   130  	BLT	X13, X15, loop1
   131  	PCALIGN	$16
   132  loop4:
   133  	MOVBU	0(X11), X16
   134  	MOVBU	0(X12), X17
   135  	MOVBU	1(X11), X18
   136  	MOVBU	1(X12), X19
   137  	XOR	X16, X17
   138  	XOR	X18, X19
   139  	MOVB	X17, 0(X10)
   140  	MOVB	X19, 1(X10)
   141  	MOVBU	2(X11), X20
   142  	MOVBU	2(X12), X21
   143  	MOVBU	3(X11), X22
   144  	MOVBU	3(X12), X23
   145  	XOR	X20, X21
   146  	XOR	X22, X23
   147  	MOVB	X21, 2(X10)
   148  	MOVB	X23, 3(X10)
   149  	ADD	$4, X10
   150  	ADD	$4, X11
   151  	ADD	$4, X12
   152  	SUB	$4, X13
   153  	BGE	X13, X15, loop4
   154  
   155  	PCALIGN	$16
   156  loop1:
   157  	BEQZ	X13, done
   158  	MOVBU	0(X11), X16
   159  	MOVBU	0(X12), X17
   160  	XOR	X16, X17
   161  	MOVB	X17, 0(X10)
   162  	ADD	$1, X10
   163  	ADD	$1, X11
   164  	ADD	$1, X12
   165  	SUB	$1, X13
   166  	JMP	loop1
   167  
   168  done:
   169  	RET
   170  

View as plain text