Text file src/internal/bytealg/compare_riscv64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "asm_riscv64.h"
     6  #include "go_asm.h"
     7  #include "textflag.h"
     8  
     9  TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
    10  	// X10 = a_base
    11  	// X11 = a_len
    12  	// X12 = a_cap (unused)
    13  	// X13 = b_base (want in X12)
    14  	// X14 = b_len (want in X13)
    15  	// X15 = b_cap (unused)
    16  	MOV	X13, X12
    17  	MOV	X14, X13
    18  	JMP	compare<>(SB)
    19  
    20  TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    21  	// X10 = a_base
    22  	// X11 = a_len
    23  	// X12 = b_base
    24  	// X13 = b_len
    25  	JMP	compare<>(SB)
    26  
    27  // On entry:
    28  // X10 points to start of a
    29  // X11 length of a
    30  // X12 points to start of b
    31  // X13 length of b
    32  // return value in X10 (-1/0/1)
    33  TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    34  	BEQ	X10, X12, cmp_len
    35  
    36  	MIN	X11, X13, X5
    37  	BEQZ	X5, cmp_len
    38  
    39  	MOV	$16, X6
    40  	BLT	X5, X6, check8_unaligned
    41  
    42  #ifndef hasV
    43  	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X6
    44  	BEQZ	X6, compare_scalar
    45  #endif
    46  
    47  	// Use vector if not 8 byte aligned.
    48  	OR	X10, X12, X6
    49  	AND	$7, X6
    50  	BNEZ	X6, vector_loop
    51  
    52  	// Use scalar if 8 byte aligned and <= 128 bytes.
    53  	SUB	$128, X5, X6
    54  	BLEZ	X6, compare_scalar_aligned
    55  
    56  	PCALIGN	$16
    57  vector_loop:
    58  	VSETVLI	X5, E8, M8, TA, MA, X6
    59  	VLE8V	(X10), V8
    60  	VLE8V	(X12), V16
    61  	VMSNEVV	V8, V16, V0
    62  	VFIRSTM	V0, X7
    63  	BGEZ	X7, vector_not_eq
    64  	ADD	X6, X10
    65  	ADD	X6, X12
    66  	SUB	X6, X5
    67  	BNEZ	X5, vector_loop
    68  	JMP	cmp_len
    69  
    70  vector_not_eq:
    71  	// Load first differing bytes in X8/X9.
    72  	ADD	X7, X10
    73  	ADD	X7, X12
    74  	MOVBU	(X10), X8
    75  	MOVBU	(X12), X9
    76  	JMP	cmp
    77  
    78  compare_scalar:
    79  	MOV	$32, X6
    80  	BLT	X5, X6, check8_unaligned
    81  
    82  	// Check alignment - if alignment differs we have to do one byte at a time.
    83  	AND	$7, X10, X7
    84  	AND	$7, X12, X8
    85  	BNE	X7, X8, check8_unaligned
    86  	BEQZ	X7, compare32
    87  
    88  	// Check one byte at a time until we reach 8 byte alignment.
    89  	SUB	X7, X0, X7
    90  	ADD	$8, X7, X7
    91  	SUB	X7, X5, X5
    92  align:
    93  	SUB	$1, X7
    94  	MOVBU	0(X10), X8
    95  	MOVBU	0(X12), X9
    96  	BNE	X8, X9, cmp
    97  	ADD	$1, X10
    98  	ADD	$1, X12
    99  	BNEZ	X7, align
   100  
   101  compare_scalar_aligned:
   102  	MOV	$32, X6
   103  	BLT	X5, X6, check16
   104  compare32:
   105  	MOV	0(X10), X15
   106  	MOV	0(X12), X16
   107  	MOV	8(X10), X17
   108  	MOV	8(X12), X18
   109  	BNE	X15, X16, cmp8a
   110  	BNE	X17, X18, cmp8b
   111  	MOV	16(X10), X15
   112  	MOV	16(X12), X16
   113  	MOV	24(X10), X17
   114  	MOV	24(X12), X18
   115  	BNE	X15, X16, cmp8a
   116  	BNE	X17, X18, cmp8b
   117  	ADD	$32, X10
   118  	ADD	$32, X12
   119  	SUB	$32, X5
   120  	BGE	X5, X6, compare32
   121  	BEQZ	X5, cmp_len
   122  
   123  check16:
   124  	MOV	$16, X6
   125  	BLT	X5, X6, check8_unaligned
   126  compare16:
   127  	MOV	0(X10), X15
   128  	MOV	0(X12), X16
   129  	MOV	8(X10), X17
   130  	MOV	8(X12), X18
   131  	BNE	X15, X16, cmp8a
   132  	BNE	X17, X18, cmp8b
   133  	ADD	$16, X10
   134  	ADD	$16, X12
   135  	SUB	$16, X5
   136  	BEQZ	X5, cmp_len
   137  
   138  check8_unaligned:
   139  	MOV	$8, X6
   140  	BLT	X5, X6, check4_unaligned
   141  compare8_unaligned:
   142  	MOVBU	0(X10), X8
   143  	MOVBU	1(X10), X15
   144  	MOVBU	2(X10), X17
   145  	MOVBU	3(X10), X19
   146  	MOVBU	4(X10), X21
   147  	MOVBU	5(X10), X23
   148  	MOVBU	6(X10), X25
   149  	MOVBU	7(X10), X29
   150  	MOVBU	0(X12), X9
   151  	MOVBU	1(X12), X16
   152  	MOVBU	2(X12), X18
   153  	MOVBU	3(X12), X20
   154  	MOVBU	4(X12), X22
   155  	MOVBU	5(X12), X24
   156  	MOVBU	6(X12), X28
   157  	MOVBU	7(X12), X30
   158  	BNE	X8, X9, cmp1a
   159  	BNE	X15, X16, cmp1b
   160  	BNE	X17, X18, cmp1c
   161  	BNE	X19, X20, cmp1d
   162  	BNE	X21, X22, cmp1e
   163  	BNE	X23, X24, cmp1f
   164  	BNE	X25, X28, cmp1g
   165  	BNE	X29, X30, cmp1h
   166  	ADD	$8, X10
   167  	ADD	$8, X12
   168  	SUB	$8, X5
   169  	BGE	X5, X6, compare8_unaligned
   170  	BEQZ	X5, cmp_len
   171  
   172  check4_unaligned:
   173  	MOV	$4, X6
   174  	BLT	X5, X6, compare1
   175  compare4_unaligned:
   176  	MOVBU	0(X10), X8
   177  	MOVBU	1(X10), X15
   178  	MOVBU	2(X10), X17
   179  	MOVBU	3(X10), X19
   180  	MOVBU	0(X12), X9
   181  	MOVBU	1(X12), X16
   182  	MOVBU	2(X12), X18
   183  	MOVBU	3(X12), X20
   184  	BNE	X8, X9, cmp1a
   185  	BNE	X15, X16, cmp1b
   186  	BNE	X17, X18, cmp1c
   187  	BNE	X19, X20, cmp1d
   188  	ADD	$4, X10
   189  	ADD	$4, X12
   190  	SUB	$4, X5
   191  	BGE	X5, X6, compare4_unaligned
   192  
   193  compare1:
   194  	BEQZ	X5, cmp_len
   195  	MOVBU	0(X10), X8
   196  	MOVBU	0(X12), X9
   197  	BNE	X8, X9, cmp
   198  	ADD	$1, X10
   199  	ADD	$1, X12
   200  	SUB	$1, X5
   201  	JMP	compare1
   202  
   203  	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   204  cmp8a:
   205  	MOV	X15, X17
   206  	MOV	X16, X18
   207  
   208  	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   209  cmp8b:
   210  	MOV	$0xff, X19
   211  cmp8_loop:
   212  	AND	X17, X19, X8
   213  	AND	X18, X19, X9
   214  	BNE	X8, X9, cmp
   215  	SLLI	$8, X19
   216  	JMP	cmp8_loop
   217  
   218  cmp1a:
   219  	SLTU	X9, X8, X5
   220  	SLTU	X8, X9, X6
   221  	JMP	cmp_ret
   222  cmp1b:
   223  	SLTU	X16, X15, X5
   224  	SLTU	X15, X16, X6
   225  	JMP	cmp_ret
   226  cmp1c:
   227  	SLTU	X18, X17, X5
   228  	SLTU	X17, X18, X6
   229  	JMP	cmp_ret
   230  cmp1d:
   231  	SLTU	X20, X19, X5
   232  	SLTU	X19, X20, X6
   233  	JMP	cmp_ret
   234  cmp1e:
   235  	SLTU	X22, X21, X5
   236  	SLTU	X21, X22, X6
   237  	JMP	cmp_ret
   238  cmp1f:
   239  	SLTU	X24, X23, X5
   240  	SLTU	X23, X24, X6
   241  	JMP	cmp_ret
   242  cmp1g:
   243  	SLTU	X28, X25, X5
   244  	SLTU	X25, X28, X6
   245  	JMP	cmp_ret
   246  cmp1h:
   247  	SLTU	X30, X29, X5
   248  	SLTU	X29, X30, X6
   249  	JMP	cmp_ret
   250  
   251  cmp_len:
   252  	MOV	X11, X8
   253  	MOV	X13, X9
   254  cmp:
   255  	SLTU	X9, X8, X5
   256  	SLTU	X8, X9, X6
   257  cmp_ret:
   258  	SUB	X5, X6, X10
   259  	RET
   260  

View as plain text