Text file src/runtime/memclr_loong64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // Register map
     9  //
    10  // R4: ptr
    11  // R5: n
    12  // R6: ptrend
    13  // R7: tmp
    14  
    15  // Algorithm:
    16  //
    17  // 1. when count <= 64 bytes, memory alignment check is omitted.
    18  // The handling is divided into distinct cases based on the size
    19  // of count: clr_0, clr_1, clr_2, clr_3, clr_4, clr_5through7,
    20  // clr_8, clr_9through16, clr_17through32, and clr_33through64.
    21  //
    22  // 2. when count > 64 bytes, memory alignment check is performed.
    23  // Unaligned bytes are processed first (that is, 8-(ptr&7)), and
    24  // then a 64-byte loop is executed to zero out memory.
    25  // When the number of remaining bytes not cleared is n < 64 bytes,
    26  // a tail processing is performed, invoking the corresponding case
    27  // based on the size of n.
    28  //
    29  //    ptr           newptr                           ptrend
    30  //     |               |<----count after correction---->|
    31  //     |<-------------count before correction---------->|
    32  //     |<--8-(ptr&7)-->|               |<---64 bytes--->|
    33  //     +------------------------------------------------+
    34  //     |   Head        |      Body     |      Tail      |
    35  //     +---------------+---------------+----------------+
    36  //    newptr = ptr - (ptr & 7) + 8
    37  //    count = count - 8 + (ptr & 7)
    38  
    39  // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
    40  TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
    41  	BEQ	R5, clr_0
    42  	ADDV	R4, R5, R6
    43  
    44  tail:
    45  	// <=64 bytes, clear directly, not check aligned
    46  	SGTU	$2, R5, R7
    47  	BNE	R7, clr_1
    48  	SGTU	$3, R5, R7
    49  	BNE	R7, clr_2
    50  	SGTU	$4, R5, R7
    51  	BNE	R7, clr_3
    52  	SGTU	$5, R5, R7
    53  	BNE	R7, clr_4
    54  	SGTU	$8, R5, R7
    55  	BNE	R7, clr_5through7
    56  	SGTU	$9, R5, R7
    57  	BNE	R7, clr_8
    58  	SGTU	$17, R5, R7
    59  	BNE	R7, clr_9through16
    60  	SGTU	$33, R5, R7
    61  	BNE	R7, clr_17through32
    62  	SGTU	$65, R5, R7
    63  	BNE	R7, clr_33through64
    64  
    65  	// n > 64 bytes, check aligned
    66  	AND	$7, R4, R7
    67  	BEQ	R7, body
    68  
    69  head:
    70  	MOVV	R0, (R4)
    71  	SUBV	R7, R4
    72  	ADDV	R7, R5
    73  	ADDV	$8, R4	// newptr = ptr + (8 - (ptr & 7))
    74  	SUBV	$8, R5	// newn = n - (8 - (ptr & 7))
    75  	SGTU	$65, R5, R7
    76  	BNE	R7, clr_33through64
    77  
    78  body:
    79  	MOVV	R0, (R4)
    80  	MOVV	R0, 8(R4)
    81  	MOVV	R0, 16(R4)
    82  	MOVV	R0, 24(R4)
    83  	MOVV	R0, 32(R4)
    84  	MOVV	R0, 40(R4)
    85  	MOVV	R0, 48(R4)
    86  	MOVV	R0, 56(R4)
    87  	ADDV	$-64, R5
    88  	ADDV	$64, R4
    89  	SGTU	$65, R5, R7
    90  	BEQ	R7, body
    91  	BEQ	R5, clr_0
    92  	JMP	tail
    93  
    94  clr_0:
    95  	RET
    96  clr_1:
    97  	MOVB	R0, (R4)
    98  	RET
    99  clr_2:
   100  	MOVH	R0, (R4)
   101  	RET
   102  clr_3:
   103  	MOVH	R0, (R4)
   104  	MOVB	R0, 2(R4)
   105  	RET
   106  clr_4:
   107  	MOVW	R0, (R4)
   108  	RET
   109  clr_5through7:
   110  	MOVW	R0, (R4)
   111  	MOVW	R0, -4(R6)
   112  	RET
   113  clr_8:
   114  	MOVV	R0, (R4)
   115  	RET
   116  clr_9through16:
   117  	MOVV	R0, (R4)
   118  	MOVV	R0, -8(R6)
   119  	RET
   120  clr_17through32:
   121  	MOVV	R0, (R4)
   122  	MOVV	R0, 8(R4)
   123  	MOVV	R0, -16(R6)
   124  	MOVV	R0, -8(R6)
   125  	RET
   126  clr_33through64:
   127  	MOVV	R0, (R4)
   128  	MOVV	R0, 8(R4)
   129  	MOVV	R0, 16(R4)
   130  	MOVV	R0, 24(R4)
   131  	MOVV	R0, -32(R6)
   132  	MOVV	R0, -24(R6)
   133  	MOVV	R0, -16(R6)
   134  	MOVV	R0, -8(R6)
   135  	RET
   136  

View as plain text