Text file
src/runtime/memmove_riscv64.s
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // See memmove Go doc for important implementation constraints.
8
9 // void runtime·memmove(void*, void*, uintptr)
10 TEXT runtime·memmove<ABIInternal>(SB),NOSPLIT,$-0-24
11 // X10 = to
12 // X11 = from
13 // X12 = n
14 BEQ X10, X11, done
15 BEQZ X12, done
16
17 // If the destination is ahead of the source, start at the end of the
18 // buffer and go backward.
19 BGTU X10, X11, backward
20
21 // If less than 8 bytes, do single byte copies.
22 MOV $8, X9
23 BLT X12, X9, f_loop4_check
24
25 // Check alignment - if alignment differs we have to do one byte at a time.
26 AND $7, X10, X5
27 AND $7, X11, X6
28 BNE X5, X6, f_loop8_unaligned_check
29 BEQZ X5, f_loop_check
30
31 // Move one byte at a time until we reach 8 byte alignment.
32 SUB X5, X9, X5
33 SUB X5, X12, X12
34 f_align:
35 SUB $1, X5
36 MOVB 0(X11), X14
37 MOVB X14, 0(X10)
38 ADD $1, X10
39 ADD $1, X11
40 BNEZ X5, f_align
41
42 f_loop_check:
43 MOV $16, X9
44 BLT X12, X9, f_loop8_check
45 MOV $32, X9
46 BLT X12, X9, f_loop16_check
47 MOV $64, X9
48 BLT X12, X9, f_loop32_check
49 f_loop64:
50 MOV 0(X11), X14
51 MOV 8(X11), X15
52 MOV 16(X11), X16
53 MOV 24(X11), X17
54 MOV 32(X11), X18
55 MOV 40(X11), X19
56 MOV 48(X11), X20
57 MOV 56(X11), X21
58 MOV X14, 0(X10)
59 MOV X15, 8(X10)
60 MOV X16, 16(X10)
61 MOV X17, 24(X10)
62 MOV X18, 32(X10)
63 MOV X19, 40(X10)
64 MOV X20, 48(X10)
65 MOV X21, 56(X10)
66 ADD $64, X10
67 ADD $64, X11
68 SUB $64, X12
69 BGE X12, X9, f_loop64
70 BEQZ X12, done
71
72 f_loop32_check:
73 MOV $32, X9
74 BLT X12, X9, f_loop16_check
75 f_loop32:
76 MOV 0(X11), X14
77 MOV 8(X11), X15
78 MOV 16(X11), X16
79 MOV 24(X11), X17
80 MOV X14, 0(X10)
81 MOV X15, 8(X10)
82 MOV X16, 16(X10)
83 MOV X17, 24(X10)
84 ADD $32, X10
85 ADD $32, X11
86 SUB $32, X12
87 BGE X12, X9, f_loop32
88 BEQZ X12, done
89
90 f_loop16_check:
91 MOV $16, X9
92 BLT X12, X9, f_loop8_check
93 f_loop16:
94 MOV 0(X11), X14
95 MOV 8(X11), X15
96 MOV X14, 0(X10)
97 MOV X15, 8(X10)
98 ADD $16, X10
99 ADD $16, X11
100 SUB $16, X12
101 BGE X12, X9, f_loop16
102 BEQZ X12, done
103
104 f_loop8_check:
105 MOV $8, X9
106 BLT X12, X9, f_loop4_check
107 f_loop8:
108 MOV 0(X11), X14
109 MOV X14, 0(X10)
110 ADD $8, X10
111 ADD $8, X11
112 SUB $8, X12
113 BGE X12, X9, f_loop8
114 BEQZ X12, done
115 JMP f_loop4_check
116
117 f_loop8_unaligned_check:
118 MOV $8, X9
119 BLT X12, X9, f_loop4_check
120 f_loop8_unaligned:
121 MOVB 0(X11), X14
122 MOVB 1(X11), X15
123 MOVB 2(X11), X16
124 MOVB 3(X11), X17
125 MOVB 4(X11), X18
126 MOVB 5(X11), X19
127 MOVB 6(X11), X20
128 MOVB 7(X11), X21
129 MOVB X14, 0(X10)
130 MOVB X15, 1(X10)
131 MOVB X16, 2(X10)
132 MOVB X17, 3(X10)
133 MOVB X18, 4(X10)
134 MOVB X19, 5(X10)
135 MOVB X20, 6(X10)
136 MOVB X21, 7(X10)
137 ADD $8, X10
138 ADD $8, X11
139 SUB $8, X12
140 BGE X12, X9, f_loop8_unaligned
141
142 f_loop4_check:
143 MOV $4, X9
144 BLT X12, X9, f_loop1
145 f_loop4:
146 MOVB 0(X11), X14
147 MOVB 1(X11), X15
148 MOVB 2(X11), X16
149 MOVB 3(X11), X17
150 MOVB X14, 0(X10)
151 MOVB X15, 1(X10)
152 MOVB X16, 2(X10)
153 MOVB X17, 3(X10)
154 ADD $4, X10
155 ADD $4, X11
156 SUB $4, X12
157 BGE X12, X9, f_loop4
158
159 f_loop1:
160 BEQZ X12, done
161 MOVB 0(X11), X14
162 MOVB X14, 0(X10)
163 ADD $1, X10
164 ADD $1, X11
165 SUB $1, X12
166 JMP f_loop1
167
168 backward:
169 ADD X10, X12, X10
170 ADD X11, X12, X11
171
172 // If less than 8 bytes, do single byte copies.
173 MOV $8, X9
174 BLT X12, X9, b_loop4_check
175
176 // Check alignment - if alignment differs we have to do one byte at a time.
177 AND $7, X10, X5
178 AND $7, X11, X6
179 BNE X5, X6, b_loop8_unaligned_check
180 BEQZ X5, b_loop_check
181
182 // Move one byte at a time until we reach 8 byte alignment.
183 SUB X5, X12, X12
184 b_align:
185 SUB $1, X5
186 SUB $1, X10
187 SUB $1, X11
188 MOVB 0(X11), X14
189 MOVB X14, 0(X10)
190 BNEZ X5, b_align
191
192 b_loop_check:
193 MOV $16, X9
194 BLT X12, X9, b_loop8_check
195 MOV $32, X9
196 BLT X12, X9, b_loop16_check
197 MOV $64, X9
198 BLT X12, X9, b_loop32_check
199 b_loop64:
200 SUB $64, X10
201 SUB $64, X11
202 MOV 0(X11), X14
203 MOV 8(X11), X15
204 MOV 16(X11), X16
205 MOV 24(X11), X17
206 MOV 32(X11), X18
207 MOV 40(X11), X19
208 MOV 48(X11), X20
209 MOV 56(X11), X21
210 MOV X14, 0(X10)
211 MOV X15, 8(X10)
212 MOV X16, 16(X10)
213 MOV X17, 24(X10)
214 MOV X18, 32(X10)
215 MOV X19, 40(X10)
216 MOV X20, 48(X10)
217 MOV X21, 56(X10)
218 SUB $64, X12
219 BGE X12, X9, b_loop64
220 BEQZ X12, done
221
222 b_loop32_check:
223 MOV $32, X9
224 BLT X12, X9, b_loop16_check
225 b_loop32:
226 SUB $32, X10
227 SUB $32, X11
228 MOV 0(X11), X14
229 MOV 8(X11), X15
230 MOV 16(X11), X16
231 MOV 24(X11), X17
232 MOV X14, 0(X10)
233 MOV X15, 8(X10)
234 MOV X16, 16(X10)
235 MOV X17, 24(X10)
236 SUB $32, X12
237 BGE X12, X9, b_loop32
238 BEQZ X12, done
239
240 b_loop16_check:
241 MOV $16, X9
242 BLT X12, X9, b_loop8_check
243 b_loop16:
244 SUB $16, X10
245 SUB $16, X11
246 MOV 0(X11), X14
247 MOV 8(X11), X15
248 MOV X14, 0(X10)
249 MOV X15, 8(X10)
250 SUB $16, X12
251 BGE X12, X9, b_loop16
252 BEQZ X12, done
253
254 b_loop8_check:
255 MOV $8, X9
256 BLT X12, X9, b_loop4_check
257 b_loop8:
258 SUB $8, X10
259 SUB $8, X11
260 MOV 0(X11), X14
261 MOV X14, 0(X10)
262 SUB $8, X12
263 BGE X12, X9, b_loop8
264 BEQZ X12, done
265 JMP b_loop4_check
266
267 b_loop8_unaligned_check:
268 MOV $8, X9
269 BLT X12, X9, b_loop4_check
270 b_loop8_unaligned:
271 SUB $8, X10
272 SUB $8, X11
273 MOVB 0(X11), X14
274 MOVB 1(X11), X15
275 MOVB 2(X11), X16
276 MOVB 3(X11), X17
277 MOVB 4(X11), X18
278 MOVB 5(X11), X19
279 MOVB 6(X11), X20
280 MOVB 7(X11), X21
281 MOVB X14, 0(X10)
282 MOVB X15, 1(X10)
283 MOVB X16, 2(X10)
284 MOVB X17, 3(X10)
285 MOVB X18, 4(X10)
286 MOVB X19, 5(X10)
287 MOVB X20, 6(X10)
288 MOVB X21, 7(X10)
289 SUB $8, X12
290 BGE X12, X9, b_loop8_unaligned
291
292 b_loop4_check:
293 MOV $4, X9
294 BLT X12, X9, b_loop1
295 b_loop4:
296 SUB $4, X10
297 SUB $4, X11
298 MOVB 0(X11), X14
299 MOVB 1(X11), X15
300 MOVB 2(X11), X16
301 MOVB 3(X11), X17
302 MOVB X14, 0(X10)
303 MOVB X15, 1(X10)
304 MOVB X16, 2(X10)
305 MOVB X17, 3(X10)
306 SUB $4, X12
307 BGE X12, X9, b_loop4
308
309 b_loop1:
310 BEQZ X12, done
311 SUB $1, X10
312 SUB $1, X11
313 MOVB 0(X11), X14
314 MOVB X14, 0(X10)
315 SUB $1, X12
316 JMP b_loop1
317
318 done:
319 RET
320
View as plain text