Text file
src/runtime/memclr_loong64.s
1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // Register map
9 //
10 // R4: ptr
11 // R5: n
12 // R6: ptrend
13 // R7: tmp
14
15 // Algorithm:
16 //
17 // 1. when count <= 64 bytes, memory alignment check is omitted.
18 // The handling is divided into distinct cases based on the size
19 // of count: clr_0, clr_1, clr_2, clr_3, clr_4, clr_5through7,
20 // clr_8, clr_9through16, clr_17through32, and clr_33through64.
21 //
22 // 2. when count > 64 bytes, memory alignment check is performed.
23 // Unaligned bytes are processed first (that is, 8-(ptr&7)), and
24 // then a 64-byte loop is executed to zero out memory.
25 // When the number of remaining bytes not cleared is n < 64 bytes,
26 // a tail processing is performed, invoking the corresponding case
27 // based on the size of n.
28 //
29 // ptr newptr ptrend
30 // | |<----count after correction---->|
31 // |<-------------count before correction---------->|
32 // |<--8-(ptr&7)-->| |<---64 bytes--->|
33 // +------------------------------------------------+
34 // | Head | Body | Tail |
35 // +---------------+---------------+----------------+
36 // newptr = ptr - (ptr & 7) + 8
37 // count = count - 8 + (ptr & 7)
38
39 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
40 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
41 BEQ R5, clr_0
42 ADDV R4, R5, R6
43
44 tail:
45 // <=64 bytes, clear directly, not check aligned
46 SGTU $2, R5, R7
47 BNE R7, clr_1
48 SGTU $3, R5, R7
49 BNE R7, clr_2
50 SGTU $4, R5, R7
51 BNE R7, clr_3
52 SGTU $5, R5, R7
53 BNE R7, clr_4
54 SGTU $8, R5, R7
55 BNE R7, clr_5through7
56 SGTU $9, R5, R7
57 BNE R7, clr_8
58 SGTU $17, R5, R7
59 BNE R7, clr_9through16
60 SGTU $33, R5, R7
61 BNE R7, clr_17through32
62 SGTU $65, R5, R7
63 BNE R7, clr_33through64
64
65 // n > 64 bytes, check aligned
66 AND $7, R4, R7
67 BEQ R7, body
68
69 head:
70 MOVV R0, (R4)
71 SUBV R7, R4
72 ADDV R7, R5
73 ADDV $8, R4 // newptr = ptr + (8 - (ptr & 7))
74 SUBV $8, R5 // newn = n - (8 - (ptr & 7))
75 SGTU $65, R5, R7
76 BNE R7, clr_33through64
77
78 body:
79 MOVV R0, (R4)
80 MOVV R0, 8(R4)
81 MOVV R0, 16(R4)
82 MOVV R0, 24(R4)
83 MOVV R0, 32(R4)
84 MOVV R0, 40(R4)
85 MOVV R0, 48(R4)
86 MOVV R0, 56(R4)
87 ADDV $-64, R5
88 ADDV $64, R4
89 SGTU $65, R5, R7
90 BEQ R7, body
91 BEQ R5, clr_0
92 JMP tail
93
94 clr_0:
95 RET
96 clr_1:
97 MOVB R0, (R4)
98 RET
99 clr_2:
100 MOVH R0, (R4)
101 RET
102 clr_3:
103 MOVH R0, (R4)
104 MOVB R0, 2(R4)
105 RET
106 clr_4:
107 MOVW R0, (R4)
108 RET
109 clr_5through7:
110 MOVW R0, (R4)
111 MOVW R0, -4(R6)
112 RET
113 clr_8:
114 MOVV R0, (R4)
115 RET
116 clr_9through16:
117 MOVV R0, (R4)
118 MOVV R0, -8(R6)
119 RET
120 clr_17through32:
121 MOVV R0, (R4)
122 MOVV R0, 8(R4)
123 MOVV R0, -16(R6)
124 MOVV R0, -8(R6)
125 RET
126 clr_33through64:
127 MOVV R0, (R4)
128 MOVV R0, 8(R4)
129 MOVV R0, 16(R4)
130 MOVV R0, 24(R4)
131 MOVV R0, -32(R6)
132 MOVV R0, -24(R6)
133 MOVV R0, -16(R6)
134 MOVV R0, -8(R6)
135 RET
136
View as plain text