Text file src/internal/bytealg/compare_loong64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
     9  	// R4 = a_base
    10  	// R5 = a_len
    11  	// R6 = a_cap (unused)
    12  	// R7 = b_base (want in R6)
    13  	// R8 = b_len (want in R7)
    14  	// R9 = b_cap (unused)
    15  	MOVV	R7, R6
    16  	MOVV	R8, R7
    17  	JMP	cmpbody<>(SB)
    18  
    19  TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
    20  	// R4 = a_base
    21  	// R5 = a_len
    22  	// R6 = b_base
    23  	// R7 = b_len
    24  	JMP	cmpbody<>(SB)
    25  
    26  // On entry:
    27  // R5 length of a
    28  // R7 length of b
    29  // R4 points to the start of a
    30  // R6 points to the start of b
    31  // for regabi the return value (-1/0/1) in R4
    32  TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
    33  	BEQ	R4, R6, cmp_len		// same start of a and b, then compare lengths
    34  
    35  	SGTU	R5, R7, R9
    36  	BNE	R9, b_lt_a
    37  	MOVV	R5, R14
    38  	JMP	entry
    39  b_lt_a:
    40  	MOVV	R7, R14			// R14 is min(R5, R7)
    41  entry:
    42  	ADDV	R4, R14, R12		// R4 start of a, R12 end of a
    43  	BEQ	R4, R12, cmp_len	// minlength is 0
    44  
    45  tail:
    46  	MOVV	$2, R15
    47  	BLT	R14, R15, cmp1		// min < 2
    48  	SLLV	$1, R15
    49  	BLT	R14, R15, cmp2		// min < 4
    50  	SLLV	$1, R15
    51  	BLT	R14, R15, cmp4		// min < 8
    52  	SLLV	$1, R15
    53  	BLT	R14, R15, cmp8		// min < 16
    54  	SLLV	$1, R15
    55  	BLT	R14, R15, cmp16		// min < 32
    56  
    57  // When min >= 32 bytes, enter the cmp32_loop loop processing:
    58  // take out 4 8-bytes from a and b in turn for comparison.
    59  cmp32_loop:
    60  	MOVV	(R4), R8
    61  	MOVV	(R6), R9
    62  	MOVV	8(R4), R10
    63  	MOVV	8(R6), R11
    64  	BNE	R8, R9, cmp8a
    65  	BNE	R10, R11, cmp8b
    66  	MOVV	16(R4), R8
    67  	MOVV	16(R6), R9
    68  	MOVV	24(R4), R10
    69  	MOVV	24(R6), R11
    70  	BNE	R8, R9, cmp8a
    71  	BNE	R10, R11, cmp8b
    72  	ADDV	$32, R4
    73  	ADDV	$32, R6
    74  	SUBV	$32, R14
    75  	BGE	R14, R15, cmp32_loop
    76  	BEQ	R14, cmp_len
    77  
    78  check16:
    79  	MOVV	$16, R15
    80  	BLT	R14, R15, check8
    81  cmp16:
    82  	MOVV	(R4), R8
    83  	MOVV	(R6), R9
    84  	MOVV	8(R4), R10
    85  	MOVV	8(R6), R11
    86  	BNE	R8, R9, cmp8a
    87  	BNE	R10, R11, cmp8b
    88  	ADDV	$16, R4
    89  	ADDV	$16, R6
    90  	SUBV	$16, R14
    91  	BEQ	R14, cmp_len
    92  
    93  check8:
    94  	MOVV	$8, R15
    95  	BLT	R14, R15, check4
    96  cmp8:
    97  	MOVV	(R4), R8
    98  	MOVV	(R6), R9
    99  	BNE	R8, R9, cmp8a
   100  	ADDV	$8, R4
   101  	ADDV	$8, R6
   102  	SUBV	$8, R14
   103  	BEQ	R14, cmp_len
   104  
   105  check4:
   106  	MOVV	$4, R15
   107  	BLT	R14, R15, check2
   108  cmp4:
   109  	MOVW	(R4), R8
   110  	MOVW	(R6), R9
   111  	BNE	R8, R9, cmp8a
   112  	ADDV	$4, R4
   113  	ADDV	$4, R6
   114  	SUBV	$4, R14
   115  	BEQ	R14, cmp_len
   116  
   117  check2:
   118  	MOVV	$2, R15
   119  	BLT	R14, R15, cmp1
   120  cmp2:
   121  	MOVH	(R4), R8
   122  	MOVH	(R6), R9
   123  	BNE	R8, R9, cmp8a
   124  	ADDV	$2, R4
   125  	ADDV	$2, R6
   126  	SUBV	$2, R14
   127  	BEQ	R14, cmp_len
   128  
   129  cmp1:
   130  	BEQ	R14, cmp_len
   131  	MOVBU	(R4), R8
   132  	MOVBU	(R6), R9
   133  	BNE	R8, R9, byte_cmp
   134  	JMP	cmp_len
   135  
   136  	// Compare 8/4/2 bytes taken from R8/R9 that are known to differ.
   137  cmp8a:
   138  	MOVV	R8, R10
   139  	MOVV	R9, R11
   140  
   141  	// Compare 8/4/2 bytes taken from R10/R11 that are known to differ.
   142  cmp8b:
   143  	MOVV	$0xff, R15
   144  
   145  	// Take single bytes from R10/R11 in turn for cyclic comparison.
   146  cmp8_loop:
   147  	AND	R10, R15, R8
   148  	AND	R11, R15, R9
   149  	BNE	R8, R9, byte_cmp
   150  	SLLV	$8, R15
   151  	JMP	cmp8_loop
   152  
   153  	// Compare 1 bytes taken from R8/R9 that are known to differ.
   154  byte_cmp:
   155  	SGTU	R8, R9, R4		// R4 = 1 if (R8 > R9)
   156  	BNE	R0, R4, ret
   157  	MOVV	$-1, R4
   158  	JMP	ret
   159  
   160  cmp_len:
   161  	SGTU	R5, R7, R8
   162  	SGTU	R7, R5, R9
   163  	SUBV	R9, R8, R4
   164  
   165  ret:
   166  	RET
   167  

View as plain text