Text file
src/runtime/memmove_loong64.s
1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // See memmove Go doc for important implementation constraints.
8
9 // Register map
10 //
11 // to R4
12 // from R5
13 // n(aka count) R6
14 // to-end R7
15 // from-end R8
16 // data R11-R18
17 // tmp R9
18
19 // Algorithm:
20 //
21 // Memory alignment check is only performed for copy size greater
22 // than 64 bytes to minimize overhead.
23 //
24 // when copy size <= 64 bytes, jump to label tail, according to the
25 // copy size to select the appropriate case and copy directly.
26 // Based on the common memory access instructions of loong64, the
27 // currently implemented cases are:
28 // move_0, move_1, move_2, move_3, move_4, move_5through7, move_8,
29 // move_9through16, move_17through32, move_33through64
30 //
31 // when copy size > 64 bytes, use the destination-aligned copying,
32 // adopt the following strategy to copy in 3 parts:
33 // 1. Head: do the memory alignment
34 // 2. Body: a 64-byte loop structure
35 // 3. Tail: processing of the remaining part (<= 64 bytes)
36 //
37 // forward:
38 //
39 // Dst NewDst Dstend
40 // | |<----count after correction---->|
41 // |<-------------count before correction---------->|
42 // |<--8-(Dst&7)-->| |<---64 bytes--->|
43 // +------------------------------------------------+
44 // | Head | Body | Tail |
45 // +---------------+---------------+----------------+
46 // NewDst = Dst - (Dst & 7) + 8
47 // count = count - 8 + (Dst & 7)
48 // Src = Src - (Dst & 7) + 8
49 //
50 // backward:
51 //
52 // Dst NewDstend Dstend
53 // |<-----count after correction------>| |
54 // |<------------count before correction--------------->|
55 // |<---64 bytes--->| |<---Dstend&7--->|
56 // +----------------------------------------------------+
57 // | Tail | Body | Head |
58 // +----------------+------------------+----------------+
59 // NewDstend = Dstend - (Dstend & 7)
60 // count = count - (Dstend & 7)
61 // Srcend = Srcend - (Dstend & 7)
62
63 // func memmove(to, from unsafe.Pointer, n uintptr)
64 TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
65 BEQ R4, R5, move_0
66 BEQ R6, move_0
67
68 ADDV R4, R6, R7 // to-end pointer
69 ADDV R5, R6, R8 // from-end pointer
70
71 tail:
72 //copy size <= 64 bytes, copy directly, not check aligned
73
74 // < 2 bytes
75 SGTU $2, R6, R9
76 BNE R9, move_1
77
78 // < 3 bytes
79 SGTU $3, R6, R9
80 BNE R9, move_2
81
82 // < 4 bytes
83 SGTU $4, R6, R9
84 BNE R9, move_3
85
86 // < 5 bytes
87 SGTU $5, R6, R9
88 BNE R9, move_4
89
90 // >= 5 bytes and < 8 bytes
91 SGTU $8, R6, R9
92 BNE R9, move_5through7
93
94 // < 9 bytes
95 SGTU $9, R6, R9
96 BNE R9, move_8
97
98 // >= 9 bytes and < 17 bytes
99 SGTU $17, R6, R9
100 BNE R9, move_9through16
101
102 // >= 17 bytes and < 33 bytes
103 SGTU $33, R6, R9
104 BNE R9, move_17through32
105
106 // >= 33 bytes and < 65 bytes
107 SGTU $65, R6, R9
108 BNE R9, move_33through64
109
110 // if (dst > src) && (dst < src + count), regarded as memory
111 // overlap, jump to backward
112 // else, jump to forward
113 BGEU R5, R4, forward
114 ADDV R5, R6, R10
115 BLTU R4, R10, backward
116
117 forward:
118 AND $7, R4, R9 // dst & 7
119 BEQ R9, body
120 head:
121 MOVV $8, R10
122 SUBV R9, R10 // head = 8 - (dst & 7)
123 MOVB (R5), R11
124 SUBV $1, R10
125 ADDV $1, R5
126 MOVB R11, (R4)
127 ADDV $1, R4
128 BNE R10, -5(PC)
129 ADDV R9, R6
130 ADDV $-8, R6 // newcount = count + (dst & 7) - 8
131 // if newcount < 65 bytes, use move_33through64 to copy is enough
132 SGTU $65, R6, R9
133 BNE R9, move_33through64
134
135 body:
136 MOVV (R5), R11
137 MOVV 8(R5), R12
138 MOVV 16(R5), R13
139 MOVV 24(R5), R14
140 MOVV 32(R5), R15
141 MOVV 40(R5), R16
142 MOVV 48(R5), R17
143 MOVV 56(R5), R18
144 MOVV R11, (R4)
145 MOVV R12, 8(R4)
146 MOVV R13, 16(R4)
147 MOVV R14, 24(R4)
148 MOVV R15, 32(R4)
149 MOVV R16, 40(R4)
150 MOVV R17, 48(R4)
151 MOVV R18, 56(R4)
152 ADDV $-64, R6
153 ADDV $64, R4
154 ADDV $64, R5
155 SGTU $64, R6, R9
156 // if the remaining part >= 64 bytes, jmp to body
157 BEQ R9, body
158 // if the remaining part == 0 bytes, use move_0 to return
159 BEQ R6, move_0
160 // if the remaining part in (0, 63] bytes, jmp to tail
161 JMP tail
162
163 // The backward copy algorithm is the same as the forward copy,
164 // except for the direction.
165 backward:
166 AND $7, R7, R9 // dstend & 7
167 BEQ R9, b_body
168 b_head:
169 MOVV -8(R8), R11
170 SUBV R9, R6 // newcount = count - (dstend & 7)
171 SUBV R9, R8 // newsrcend = srcend - (dstend & 7)
172 MOVV -8(R8), R12
173 MOVV R11, -8(R7)
174 SUBV R9, R7 // newdstend = dstend - (dstend & 7)
175 MOVV R12, -8(R7)
176 SUBV $8, R6
177 SUBV $8, R7
178 SUBV $8, R8
179 SGTU $65, R6, R9
180 BNE R9, move_33through64
181
182 b_body:
183 MOVV -8(R8), R11
184 MOVV -16(R8), R12
185 MOVV -24(R8), R13
186 MOVV -32(R8), R14
187 MOVV -40(R8), R15
188 MOVV -48(R8), R16
189 MOVV -56(R8), R17
190 MOVV -64(R8), R18
191 MOVV R11, -8(R7)
192 MOVV R12, -16(R7)
193 MOVV R13, -24(R7)
194 MOVV R14, -32(R7)
195 MOVV R15, -40(R7)
196 MOVV R16, -48(R7)
197 MOVV R17, -56(R7)
198 MOVV R18, -64(R7)
199 ADDV $-64, R6
200 ADDV $-64, R7
201 ADDV $-64, R8
202 SGTU $64, R6, R9
203 BEQ R9, b_body
204 BEQ R6, move_0
205 JMP tail
206
207 move_0:
208 RET
209
210 move_1:
211 MOVB (R5), R11
212 MOVB R11, (R4)
213 RET
214 move_2:
215 MOVH (R5), R11
216 MOVH R11, (R4)
217 RET
218 move_3:
219 MOVH (R5), R11
220 MOVB -1(R8), R12
221 MOVH R11, (R4)
222 MOVB R12, -1(R7)
223 RET
224 move_4:
225 MOVW (R5), R11
226 MOVW R11, (R4)
227 RET
228 move_5through7:
229 MOVW (R5), R11
230 MOVW -4(R8), R12
231 MOVW R11, (R4)
232 MOVW R12, -4(R7)
233 RET
234 move_8:
235 MOVV (R5), R11
236 MOVV R11, (R4)
237 RET
238 move_9through16:
239 MOVV (R5), R11
240 MOVV -8(R8), R12
241 MOVV R11, (R4)
242 MOVV R12, -8(R7)
243 RET
244 move_17through32:
245 MOVV (R5), R11
246 MOVV 8(R5), R12
247 MOVV -16(R8), R13
248 MOVV -8(R8), R14
249 MOVV R11, (R4)
250 MOVV R12, 8(R4)
251 MOVV R13, -16(R7)
252 MOVV R14, -8(R7)
253 RET
254 move_33through64:
255 MOVV (R5), R11
256 MOVV 8(R5), R12
257 MOVV 16(R5), R13
258 MOVV 24(R5), R14
259 MOVV -32(R8), R15
260 MOVV -24(R8), R16
261 MOVV -16(R8), R17
262 MOVV -8(R8), R18
263 MOVV R11, (R4)
264 MOVV R12, 8(R4)
265 MOVV R13, 16(R4)
266 MOVV R14, 24(R4)
267 MOVV R15, -32(R7)
268 MOVV R16, -24(R7)
269 MOVV R17, -16(R7)
270 MOVV R18, -8(R7)
271 RET
272
View as plain text