Text file
src/runtime/memclr_ppc64x.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6
7 #include "textflag.h"
8
9 // See memclrNoHeapPointers Go doc for important implementation constraints.
10
11 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
12 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-16
13 // R3 = ptr
14 // R4 = n
15
16 // Determine if there are doublewords to clear
17 check:
18 ANDCC $7, R4, R5 // R5: leftover bytes to clear
19 SRD $3, R4, R6 // R6: double words to clear
20 CMP R6, $0, CR1 // CR1[EQ] set if no double words
21
22 BC 12, 6, nozerolarge // only single bytes
23 CMP R4, $512
24 BLT under512 // special case for < 512
25 ANDCC $127, R3, R8 // check for 128 alignment of address
26 BEQ zero512setup
27
28 ANDCC $7, R3, R15
29 BEQ zero512xsetup // at least 8 byte aligned
30
31 // zero bytes up to 8 byte alignment
32
33 ANDCC $1, R3, R15 // check for byte alignment
34 BEQ byte2
35 MOVB R0, 0(R3) // zero 1 byte
36 ADD $1, R3 // bump ptr by 1
37 ADD $-1, R4
38
39 byte2:
40 ANDCC $2, R3, R15 // check for 2 byte alignment
41 BEQ byte4
42 MOVH R0, 0(R3) // zero 2 bytes
43 ADD $2, R3 // bump ptr by 2
44 ADD $-2, R4
45
46 byte4:
47 ANDCC $4, R3, R15 // check for 4 byte alignment
48 BEQ zero512xsetup
49 MOVW R0, 0(R3) // zero 4 bytes
50 ADD $4, R3 // bump ptr by 4
51 ADD $-4, R4
52 BR zero512xsetup // ptr should now be 8 byte aligned
53
54 under512:
55 SRDCC $3, R6, R7 // 64 byte chunks?
56 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
57 BEQ lt64gt8
58
59 // Prepare to clear 64 bytes at a time.
60
61 zero64setup:
62 DCBTST (R3) // prepare data cache
63 MOVD R7, CTR // number of 64 byte chunks
64 MOVD $16, R8
65 MOVD $32, R16
66 MOVD $48, R17
67
68 zero64:
69 STXVD2X VS32, (R3+R0) // store 16 bytes
70 STXVD2X VS32, (R3+R8)
71 STXVD2X VS32, (R3+R16)
72 STXVD2X VS32, (R3+R17)
73 ADD $64, R3
74 ADD $-64, R4
75 BDNZ zero64 // dec ctr, br zero64 if ctr not 0
76 SRDCC $3, R4, R6 // remaining doublewords
77 BEQ nozerolarge
78
79 lt64gt8:
80 CMP R4, $32
81 BLT lt32gt8
82 MOVD $16, R8
83 STXVD2X VS32, (R3+R0)
84 STXVD2X VS32, (R3+R8)
85 ADD $-32, R4
86 ADD $32, R3
87 lt32gt8:
88 CMP R4, $16
89 BLT lt16gt8
90 STXVD2X VS32, (R3+R0)
91 ADD $16, R3
92 ADD $-16, R4
93 lt16gt8:
94 #ifdef GOPPC64_power10
95 SLD $56, R4, R7
96 STXVL V0, R3, R7
97 RET
98 #else
99 CMP R4, $8
100 BLT nozerolarge
101 MOVD R0, 0(R3)
102 ADD $8, R3
103 ADD $-8, R4
104 #endif
105 nozerolarge:
106 ANDCC $7, R4, R5 // any remaining bytes
107 BC 4, 1, LR // ble lr
108 #ifdef GOPPC64_power10
109 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
110 SLD $56, R5, R7
111 STXVL V0, R3, R7
112 RET
113 #else
114 CMP R5, $4
115 BLT next2
116 MOVW R0, 0(R3)
117 ADD $4, R3
118 ADD $-4, R5
119 next2:
120 CMP R5, $2
121 BLT next1
122 MOVH R0, 0(R3)
123 ADD $2, R3
124 ADD $-2, R5
125 next1:
126 CMP R5, $0
127 BC 12, 2, LR // beqlr
128 MOVB R0, 0(R3)
129 RET
130 #endif
131
132 zero512xsetup: // 512 chunk with extra needed
133 ANDCC $8, R3, R11 // 8 byte alignment?
134 BEQ zero512setup16
135 MOVD R0, 0(R3) // clear 8 bytes
136 ADD $8, R3 // update ptr to next 8
137 ADD $-8, R4 // dec count by 8
138
139 zero512setup16:
140 ANDCC $127, R3, R14 // < 128 byte alignment
141 BEQ zero512setup // handle 128 byte alignment
142 MOVD $128, R15
143 SUB R14, R15, R14 // find increment to 128 alignment
144 SRD $4, R14, R15 // number of 16 byte chunks
145 MOVD R15, CTR // loop counter of 16 bytes
146 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
147
148 zero512preloop: // clear up to 128 alignment
149 STXVD2X VS32, (R3+R0) // clear 16 bytes
150 ADD $16, R3 // update ptr
151 ADD $-16, R4 // dec count
152 BDNZ zero512preloop
153
154 zero512setup: // setup for dcbz loop
155 CMP R4, $512 // check if at least 512
156 BLT remain
157 SRD $9, R4, R8 // loop count for 512 chunks
158 MOVD R8, CTR // set up counter
159 MOVD $128, R9 // index regs for 128 bytes
160 MOVD $256, R10
161 MOVD $384, R11
162 PCALIGN $16
163 zero512:
164 DCBZ (R3+R0) // clear first chunk
165 DCBZ (R3+R9) // clear second chunk
166 DCBZ (R3+R10) // clear third chunk
167 DCBZ (R3+R11) // clear fourth chunk
168 ADD $512, R3
169 BDNZ zero512
170 ANDCC $511, R4
171
172 remain:
173 CMP R4, $128 // check if 128 byte chunks left
174 BLT smaller
175 DCBZ (R3+R0) // clear 128
176 ADD $128, R3
177 ADD $-128, R4
178 BR remain
179
180 smaller:
181 ANDCC $127, R4, R7 // find leftovers
182 BEQ done
183 CMP R7, $64 // more than 64, do 64 at a time
184 XXLXOR VS32, VS32, VS32
185 BLT lt64gt8 // less than 64
186 SRD $6, R7, R7 // set up counter for 64
187 BR zero64setup
188
189 done:
190 RET
191
View as plain text