Text file src/runtime/memmove_loong64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // See memmove Go doc for important implementation constraints.
     8  
     9  // Register map
    10  //
    11  // to		R4
    12  // from		R5
    13  // n(aka count)	R6
    14  // to-end	R7
    15  // from-end	R8
    16  // data		R11-R18
    17  // tmp		R9
    18  
    19  // Algorithm:
    20  //
    21  // Memory alignment check is only performed for copy size greater
    22  // than 64 bytes to minimize overhead.
    23  //
    24  // when copy size <= 64 bytes, jump to label tail, according to the
    25  // copy size to select the appropriate case and copy directly.
    26  // Based on the common memory access instructions of loong64, the
    27  // currently implemented cases are:
    28  // move_0, move_1, move_2, move_3, move_4, move_5through7, move_8,
    29  // move_9through16, move_17through32, move_33through64
    30  //
    31  // when copy size > 64 bytes, use the destination-aligned copying,
    32  // adopt the following strategy to copy in 3 parts:
    33  // 1. Head: do the memory alignment
    34  // 2. Body: a 64-byte loop structure
    35  // 3. Tail: processing of the remaining part (<= 64 bytes)
    36  //
    37  // forward:
    38  //
    39  //    Dst           NewDst                           Dstend
    40  //     |               |<----count after correction---->|
    41  //     |<-------------count before correction---------->|
    42  //     |<--8-(Dst&7)-->|               |<---64 bytes--->|
    43  //     +------------------------------------------------+
    44  //     |   Head        |      Body     |      Tail      |
    45  //     +---------------+---------------+----------------+
    46  //    NewDst = Dst - (Dst & 7) + 8
    47  //    count = count - 8 + (Dst & 7)
    48  //    Src = Src - (Dst & 7) + 8
    49  //
    50  // backward:
    51  //
    52  //    Dst                             NewDstend          Dstend
    53  //     |<-----count after correction------>|                |
    54  //     |<------------count before correction--------------->|
    55  //     |<---64 bytes--->|                  |<---Dstend&7--->|
    56  //     +----------------------------------------------------+
    57  //     |   Tail         |      Body        |      Head      |
    58  //     +----------------+------------------+----------------+
    59  //    NewDstend = Dstend - (Dstend & 7)
    60  //    count = count - (Dstend & 7)
    61  //    Srcend = Srcend - (Dstend & 7)
    62  
    63  // func memmove(to, from unsafe.Pointer, n uintptr)
    64  TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
    65  	BEQ	R4, R5, move_0
    66  	BEQ	R6, move_0
    67  
    68  	ADDV	R4, R6, R7 // to-end pointer
    69  	ADDV	R5, R6, R8 // from-end pointer
    70  
    71  tail:
    72  	//copy size <= 64 bytes, copy directly, not check aligned
    73  
    74  	// < 2 bytes
    75  	SGTU	$2, R6, R9
    76  	BNE	R9, move_1
    77  
    78  	// < 3 bytes
    79  	SGTU	$3, R6, R9
    80  	BNE	R9, move_2
    81  
    82  	// < 4 bytes
    83  	SGTU	$4, R6, R9
    84  	BNE	R9, move_3
    85  
    86  	// < 5 bytes
    87  	SGTU	$5, R6, R9
    88  	BNE	R9, move_4
    89  
    90  	// >= 5 bytes and < 8 bytes
    91  	SGTU	$8, R6, R9
    92  	BNE	R9, move_5through7
    93  
    94  	// < 9 bytes
    95  	SGTU	$9, R6, R9
    96  	BNE	R9, move_8
    97  
    98  	// >= 9 bytes and < 17 bytes
    99  	SGTU	$17, R6, R9
   100  	BNE	R9, move_9through16
   101  
   102  	// >= 17 bytes and < 33 bytes
   103  	SGTU	$33, R6, R9
   104  	BNE	R9, move_17through32
   105  
   106  	// >= 33 bytes and < 65 bytes
   107  	SGTU	$65, R6, R9
   108  	BNE	R9, move_33through64
   109  
   110  	// if (dst > src) && (dst < src + count), regarded as memory
   111  	// overlap, jump to backward
   112  	// else, jump to forward
   113  	BGEU	R5, R4, forward
   114  	ADDV	R5, R6, R10
   115  	BLTU	R4, R10, backward
   116  
   117  forward:
   118  	AND	$7, R4, R9	// dst & 7
   119  	BEQ	R9, body
   120  head:
   121  	MOVV	$8, R10
   122  	SUBV	R9, R10		// head = 8 - (dst & 7)
   123  	MOVB	(R5), R11
   124  	SUBV	$1, R10
   125  	ADDV	$1, R5
   126  	MOVB	R11, (R4)
   127  	ADDV	$1, R4
   128  	BNE	R10, -5(PC)
   129  	ADDV	R9, R6
   130  	ADDV	$-8, R6		// newcount = count + (dst & 7) - 8
   131  	// if newcount < 65 bytes, use move_33through64 to copy is enough
   132  	SGTU	$65, R6, R9
   133  	BNE	R9, move_33through64
   134  
   135  body:
   136  	MOVV	(R5), R11
   137  	MOVV	8(R5), R12
   138  	MOVV	16(R5), R13
   139  	MOVV	24(R5), R14
   140  	MOVV	32(R5), R15
   141  	MOVV	40(R5), R16
   142  	MOVV	48(R5), R17
   143  	MOVV	56(R5), R18
   144  	MOVV	R11, (R4)
   145  	MOVV	R12, 8(R4)
   146  	MOVV	R13, 16(R4)
   147  	MOVV	R14, 24(R4)
   148  	MOVV	R15, 32(R4)
   149  	MOVV	R16, 40(R4)
   150  	MOVV	R17, 48(R4)
   151  	MOVV	R18, 56(R4)
   152  	ADDV	$-64, R6
   153  	ADDV	$64, R4
   154  	ADDV	$64, R5
   155  	SGTU	$64, R6, R9
   156  	// if the remaining part >= 64 bytes, jmp to body
   157  	BEQ	R9, body
   158  	// if the remaining part == 0 bytes, use move_0 to return
   159  	BEQ	R6, move_0
   160  	// if the remaining part in (0, 63] bytes, jmp to tail
   161  	JMP	tail
   162  
   163  // The backward copy algorithm is the same as the forward copy,
   164  // except for the direction.
   165  backward:
   166  	AND	$7, R7, R9	 // dstend & 7
   167  	BEQ	R9, b_body
   168  b_head:
   169  	MOVV	-8(R8), R11
   170  	SUBV	R9, R6		// newcount = count - (dstend & 7)
   171  	SUBV	R9, R8		// newsrcend = srcend - (dstend & 7)
   172  	MOVV	-8(R8), R12
   173   	MOVV	R11, -8(R7)
   174  	SUBV	R9, R7		// newdstend = dstend - (dstend & 7)
   175   	MOVV	R12, -8(R7)
   176  	SUBV	$8, R6
   177  	SUBV	$8, R7
   178  	SUBV	$8, R8
   179  	SGTU    $65, R6, R9
   180  	BNE     R9, move_33through64
   181  
   182  b_body:
   183  	MOVV	-8(R8), R11
   184  	MOVV	-16(R8), R12
   185  	MOVV	-24(R8), R13
   186  	MOVV	-32(R8), R14
   187  	MOVV	-40(R8), R15
   188  	MOVV	-48(R8), R16
   189  	MOVV	-56(R8), R17
   190  	MOVV	-64(R8), R18
   191  	MOVV	R11, -8(R7)
   192  	MOVV	R12, -16(R7)
   193  	MOVV	R13, -24(R7)
   194  	MOVV	R14, -32(R7)
   195  	MOVV	R15, -40(R7)
   196  	MOVV	R16, -48(R7)
   197  	MOVV	R17, -56(R7)
   198  	MOVV	R18, -64(R7)
   199  	ADDV	$-64, R6
   200  	ADDV	$-64, R7
   201  	ADDV	$-64, R8
   202  	SGTU	$64, R6, R9
   203  	BEQ	R9, b_body
   204  	BEQ	R6, move_0
   205  	JMP	tail
   206  
   207  move_0:
   208  	RET
   209  
   210  move_1:
   211  	MOVB	(R5), R11
   212  	MOVB	R11, (R4)
   213  	RET
   214  move_2:
   215  	MOVH	(R5), R11
   216  	MOVH	R11, (R4)
   217  	RET
   218  move_3:
   219  	MOVH	(R5), R11
   220  	MOVB	-1(R8), R12
   221  	MOVH	R11, (R4)
   222  	MOVB	R12, -1(R7)
   223  	RET
   224  move_4:
   225  	MOVW	(R5), R11
   226  	MOVW	R11, (R4)
   227  	RET
   228  move_5through7:
   229  	MOVW	(R5), R11
   230  	MOVW	-4(R8), R12
   231  	MOVW	R11, (R4)
   232  	MOVW	R12, -4(R7)
   233  	RET
   234  move_8:
   235  	MOVV	(R5), R11
   236  	MOVV	R11, (R4)
   237  	RET
   238  move_9through16:
   239  	MOVV	(R5), R11
   240  	MOVV	-8(R8), R12
   241  	MOVV	R11, (R4)
   242  	MOVV	R12, -8(R7)
   243  	RET
   244  move_17through32:
   245  	MOVV	(R5), R11
   246  	MOVV	8(R5), R12
   247  	MOVV	-16(R8), R13
   248  	MOVV	-8(R8), R14
   249  	MOVV	R11, (R4)
   250  	MOVV	R12, 8(R4)
   251  	MOVV	R13, -16(R7)
   252  	MOVV	R14, -8(R7)
   253  	RET
   254  move_33through64:
   255  	MOVV	(R5), R11
   256  	MOVV	8(R5), R12
   257  	MOVV	16(R5), R13
   258  	MOVV	24(R5), R14
   259  	MOVV	-32(R8), R15
   260  	MOVV	-24(R8), R16
   261  	MOVV	-16(R8), R17
   262  	MOVV	-8(R8), R18
   263  	MOVV	R11, (R4)
   264  	MOVV	R12, 8(R4)
   265  	MOVV	R13, 16(R4)
   266  	MOVV	R14, 24(R4)
   267  	MOVV	R15, -32(R7)
   268  	MOVV	R16, -24(R7)
   269  	MOVV	R17, -16(R7)
   270  	MOVV	R18, -8(R7)
   271  	RET
   272  

View as plain text