Text file src/internal/bytealg/equal_arm64.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal_varlen(a, b unsafe.Pointer) bool
     9  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    10  	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
    11  	CBZ	R2, eq
    12  	B	runtime·memequal<ABIInternal>(SB)
    13  eq:
    14  	MOVD	$1, R0
    15  	RET
    16  
    17  // input:
    18  // R0: pointer a
    19  // R1: pointer b
    20  // R2: data len
    21  // at return: result in R0
    22  // memequal(a, b unsafe.Pointer, size uintptr) bool
    23  TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    24  	// short path to handle 0-byte case
    25  	CBZ     R2, equal
    26  	// short path to handle equal pointers
    27  	CMP     R0, R1
    28  	BEQ     equal
    29  	CMP	$1, R2
    30  	// handle 1-byte special case for better performance
    31  	BEQ	one
    32  	CMP	$16, R2
    33  	// handle specially if length < 16
    34  	BLO	tail
    35  	BIC	$0x3f, R2, R3
    36  	CBZ	R3, chunk16
    37  	// work with 64-byte chunks
    38  	ADD	R3, R0, R6	// end of chunks
    39  chunk64_loop:
    40  	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    41  	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
    42  	VCMEQ	V0.D2, V4.D2, V8.D2
    43  	VCMEQ	V1.D2, V5.D2, V9.D2
    44  	VCMEQ	V2.D2, V6.D2, V10.D2
    45  	VCMEQ	V3.D2, V7.D2, V11.D2
    46  	VAND	V8.B16, V9.B16, V8.B16
    47  	VAND	V8.B16, V10.B16, V8.B16
    48  	VAND	V8.B16, V11.B16, V8.B16
    49  	CMP	R0, R6
    50  	VMOV	V8.D[0], R4
    51  	VMOV	V8.D[1], R5
    52  	CBZ	R4, not_equal
    53  	CBZ	R5, not_equal
    54  	BNE	chunk64_loop
    55  	AND	$0x3f, R2, R2
    56  	CBZ	R2, equal
    57  chunk16:
    58  	// work with 16-byte chunks
    59  	BIC	$0xf, R2, R3
    60  	CBZ	R3, tail
    61  	ADD	R3, R0, R6	// end of chunks
    62  chunk16_loop:
    63  	LDP.P	16(R0), (R4, R5)
    64  	LDP.P	16(R1), (R7, R9)
    65  	EOR	R4, R7
    66  	CBNZ	R7, not_equal
    67  	EOR	R5, R9
    68  	CBNZ	R9, not_equal
    69  	CMP	R0, R6
    70  	BNE	chunk16_loop
    71  	AND	$0xf, R2, R2
    72  	CBZ	R2, equal
    73  tail:
    74  	// special compare of tail with length < 16
    75  	TBZ	$3, R2, lt_8
    76  	MOVD	(R0), R4
    77  	MOVD	(R1), R5
    78  	EOR	R4, R5
    79  	CBNZ	R5, not_equal
    80  	SUB	$8, R2, R6	// offset of the last 8 bytes
    81  	MOVD	(R0)(R6), R4
    82  	MOVD	(R1)(R6), R5
    83  	EOR	R4, R5
    84  	CBNZ	R5, not_equal
    85  	B	equal
    86  	PCALIGN	$16
    87  lt_8:
    88  	TBZ	$2, R2, lt_4
    89  	MOVWU	(R0), R4
    90  	MOVWU	(R1), R5
    91  	EOR	R4, R5
    92  	CBNZ	R5, not_equal
    93  	SUB	$4, R2, R6	// offset of the last 4 bytes
    94  	MOVWU	(R0)(R6), R4
    95  	MOVWU	(R1)(R6), R5
    96  	EOR	R4, R5
    97  	CBNZ	R5, not_equal
    98  	B	equal
    99  	PCALIGN	$16
   100  lt_4:
   101  	TBZ	$1, R2, lt_2
   102  	MOVHU.P	2(R0), R4
   103  	MOVHU.P	2(R1), R5
   104  	CMP	R4, R5
   105  	BNE	not_equal
   106  lt_2:
   107  	TBZ	$0, R2, equal
   108  one:
   109  	MOVBU	(R0), R4
   110  	MOVBU	(R1), R5
   111  	CMP	R4, R5
   112  	BNE	not_equal
   113  equal:
   114  	MOVD	$1, R0
   115  	RET
   116  not_equal:
   117  	MOVB	ZR, R0
   118  	RET
   119  

View as plain text