Text file src/internal/bytealg/equal_arm64.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    10  	// short path to handle 0-byte case
    11  	CBZ	R2, equal
    12  	// short path to handle equal pointers
    13  	CMP	R0, R1
    14  	BEQ	equal
    15  	B	memeqbody<>(SB)
    16  equal:
    17  	MOVD	$1, R0
    18  	RET
    19  
    20  // memequal_varlen(a, b unsafe.Pointer) bool
    21  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    22  	CMP	R0, R1
    23  	BEQ	eq
    24  	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
    25  	CBZ	R2, eq
    26  	B	memeqbody<>(SB)
    27  eq:
    28  	MOVD	$1, R0
    29  	RET
    30  
    31  // input:
    32  // R0: pointer a
    33  // R1: pointer b
    34  // R2: data len
    35  // at return: result in R0
    36  TEXT memeqbody<>(SB),NOSPLIT,$0
    37  	CMP	$1, R2
    38  	// handle 1-byte special case for better performance
    39  	BEQ	one
    40  	CMP	$16, R2
    41  	// handle specially if length < 16
    42  	BLO	tail
    43  	BIC	$0x3f, R2, R3
    44  	CBZ	R3, chunk16
    45  	// work with 64-byte chunks
    46  	ADD	R3, R0, R6	// end of chunks
    47  chunk64_loop:
    48  	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    49  	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
    50  	VCMEQ	V0.D2, V4.D2, V8.D2
    51  	VCMEQ	V1.D2, V5.D2, V9.D2
    52  	VCMEQ	V2.D2, V6.D2, V10.D2
    53  	VCMEQ	V3.D2, V7.D2, V11.D2
    54  	VAND	V8.B16, V9.B16, V8.B16
    55  	VAND	V8.B16, V10.B16, V8.B16
    56  	VAND	V8.B16, V11.B16, V8.B16
    57  	CMP	R0, R6
    58  	VMOV	V8.D[0], R4
    59  	VMOV	V8.D[1], R5
    60  	CBZ	R4, not_equal
    61  	CBZ	R5, not_equal
    62  	BNE	chunk64_loop
    63  	AND	$0x3f, R2, R2
    64  	CBZ	R2, equal
    65  chunk16:
    66  	// work with 16-byte chunks
    67  	BIC	$0xf, R2, R3
    68  	CBZ	R3, tail
    69  	ADD	R3, R0, R6	// end of chunks
    70  chunk16_loop:
    71  	LDP.P	16(R0), (R4, R5)
    72  	LDP.P	16(R1), (R7, R9)
    73  	EOR	R4, R7
    74  	CBNZ	R7, not_equal
    75  	EOR	R5, R9
    76  	CBNZ	R9, not_equal
    77  	CMP	R0, R6
    78  	BNE	chunk16_loop
    79  	AND	$0xf, R2, R2
    80  	CBZ	R2, equal
    81  tail:
    82  	// special compare of tail with length < 16
    83  	TBZ	$3, R2, lt_8
    84  	MOVD	(R0), R4
    85  	MOVD	(R1), R5
    86  	EOR	R4, R5
    87  	CBNZ	R5, not_equal
    88  	SUB	$8, R2, R6	// offset of the last 8 bytes
    89  	MOVD	(R0)(R6), R4
    90  	MOVD	(R1)(R6), R5
    91  	EOR	R4, R5
    92  	CBNZ	R5, not_equal
    93  	B	equal
    94  lt_8:
    95  	TBZ	$2, R2, lt_4
    96  	MOVWU	(R0), R4
    97  	MOVWU	(R1), R5
    98  	EOR	R4, R5
    99  	CBNZ	R5, not_equal
   100  	SUB	$4, R2, R6	// offset of the last 4 bytes
   101  	MOVWU	(R0)(R6), R4
   102  	MOVWU	(R1)(R6), R5
   103  	EOR	R4, R5
   104  	CBNZ	R5, not_equal
   105  	B	equal
   106  lt_4:
   107  	TBZ	$1, R2, lt_2
   108  	MOVHU.P	2(R0), R4
   109  	MOVHU.P	2(R1), R5
   110  	CMP	R4, R5
   111  	BNE	not_equal
   112  lt_2:
   113  	TBZ	$0, R2, equal
   114  one:
   115  	MOVBU	(R0), R4
   116  	MOVBU	(R1), R5
   117  	CMP	R4, R5
   118  	BNE	not_equal
   119  equal:
   120  	MOVD	$1, R0
   121  	RET
   122  not_equal:
   123  	MOVB	ZR, R0
   124  	RET
   125  

View as plain text