1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6
7 #include "textflag.h"
8
9 // For more details about how various memory models are
10 // enforced on POWER, the following paper provides more
11 // details about how they enforce C/C++ like models. This
12 // gives context about why the strange looking code
13 // sequences below work.
14 //
15 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
16
17 // uint32 ·Load(uint32 volatile* ptr)
18 TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
19 MOVD ptr+0(FP), R3
20 SYNC
21 MOVWZ 0(R3), R3
22 CMPW R3, R3, CR7
23 BC 4, 30, 1(PC) // bne- cr7,0x4
24 ISYNC
25 MOVW R3, ret+8(FP)
26 RET
27
28 // uint8 ·Load8(uint8 volatile* ptr)
29 TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
30 MOVD ptr+0(FP), R3
31 SYNC
32 MOVBZ 0(R3), R3
33 CMP R3, R3, CR7
34 BC 4, 30, 1(PC) // bne- cr7,0x4
35 ISYNC
36 MOVB R3, ret+8(FP)
37 RET
38
39 // uint64 ·Load64(uint64 volatile* ptr)
40 TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
41 MOVD ptr+0(FP), R3
42 SYNC
43 MOVD 0(R3), R3
44 CMP R3, R3, CR7
45 BC 4, 30, 1(PC) // bne- cr7,0x4
46 ISYNC
47 MOVD R3, ret+8(FP)
48 RET
49
50 // void *·Loadp(void *volatile *ptr)
51 TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
52 MOVD ptr+0(FP), R3
53 SYNC
54 MOVD 0(R3), R3
55 CMP R3, R3, CR7
56 BC 4, 30, 1(PC) // bne- cr7,0x4
57 ISYNC
58 MOVD R3, ret+8(FP)
59 RET
60
61 // uint32 ·LoadAcq(uint32 volatile* ptr)
62 TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
63 MOVD ptr+0(FP), R3
64 MOVWZ 0(R3), R3
65 CMPW R3, R3, CR7
66 BC 4, 30, 1(PC) // bne- cr7, 0x4
67 ISYNC
68 MOVW R3, ret+8(FP)
69 RET
70
71 // uint64 ·LoadAcq64(uint64 volatile* ptr)
72 TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
73 MOVD ptr+0(FP), R3
74 MOVD 0(R3), R3
75 CMP R3, R3, CR7
76 BC 4, 30, 1(PC) // bne- cr7, 0x4
77 ISYNC
78 MOVD R3, ret+8(FP)
79 RET
80
81 // bool cas(uint32 *ptr, uint32 old, uint32 new)
82 // Atomically:
83 // if(*val == old){
84 // *val = new;
85 // return 1;
86 // } else
87 // return 0;
88 TEXT ·Cas(SB), NOSPLIT, $0-17
89 MOVD ptr+0(FP), R3
90 MOVWZ old+8(FP), R4
91 MOVWZ new+12(FP), R5
92 LWSYNC
93 cas_again:
94 LWAR (R3), R6
95 CMPW R6, R4
96 BNE cas_fail
97 STWCCC R5, (R3)
98 BNE cas_again
99 MOVD $1, R3
100 LWSYNC
101 MOVB R3, ret+16(FP)
102 RET
103 cas_fail:
104 LWSYNC
105 MOVB R0, ret+16(FP)
106 RET
107
108 // bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
109 // Atomically:
110 // if(*val == old){
111 // *val = new;
112 // return 1;
113 // } else {
114 // return 0;
115 // }
116 TEXT ·Cas64(SB), NOSPLIT, $0-25
117 MOVD ptr+0(FP), R3
118 MOVD old+8(FP), R4
119 MOVD new+16(FP), R5
120 LWSYNC
121 cas64_again:
122 LDAR (R3), R6
123 CMP R6, R4
124 BNE cas64_fail
125 STDCCC R5, (R3)
126 BNE cas64_again
127 MOVD $1, R3
128 LWSYNC
129 MOVB R3, ret+24(FP)
130 RET
131 cas64_fail:
132 LWSYNC
133 MOVB R0, ret+24(FP)
134 RET
135
136 TEXT ·CasRel(SB), NOSPLIT, $0-17
137 MOVD ptr+0(FP), R3
138 MOVWZ old+8(FP), R4
139 MOVWZ new+12(FP), R5
140 LWSYNC
141 cas_again:
142 LWAR (R3), $0, R6 // 0 = Mutex release hint
143 CMPW R6, R4
144 BNE cas_fail
145 STWCCC R5, (R3)
146 BNE cas_again
147 MOVD $1, R3
148 MOVB R3, ret+16(FP)
149 RET
150 cas_fail:
151 MOVB R0, ret+16(FP)
152 RET
153
154 TEXT ·Casint32(SB), NOSPLIT, $0-17
155 BR ·Cas(SB)
156
157 TEXT ·Casint64(SB), NOSPLIT, $0-25
158 BR ·Cas64(SB)
159
160 TEXT ·Casuintptr(SB), NOSPLIT, $0-25
161 BR ·Cas64(SB)
162
163 TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
164 BR ·Load64(SB)
165
166 TEXT ·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16
167 BR ·LoadAcq64(SB)
168
169 TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
170 BR ·Load64(SB)
171
172 TEXT ·Storeint32(SB), NOSPLIT, $0-12
173 BR ·Store(SB)
174
175 TEXT ·Storeint64(SB), NOSPLIT, $0-16
176 BR ·Store64(SB)
177
178 TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
179 BR ·Store64(SB)
180
181 TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
182 BR ·StoreRel64(SB)
183
184 TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
185 BR ·Xadd64(SB)
186
187 TEXT ·Loadint32(SB), NOSPLIT, $0-12
188 BR ·Load(SB)
189
190 TEXT ·Loadint64(SB), NOSPLIT, $0-16
191 BR ·Load64(SB)
192
193 TEXT ·Xaddint32(SB), NOSPLIT, $0-20
194 BR ·Xadd(SB)
195
196 TEXT ·Xaddint64(SB), NOSPLIT, $0-24
197 BR ·Xadd64(SB)
198
199 // bool casp(void **val, void *old, void *new)
200 // Atomically:
201 // if(*val == old){
202 // *val = new;
203 // return 1;
204 // } else
205 // return 0;
206 TEXT ·Casp1(SB), NOSPLIT, $0-25
207 BR ·Cas64(SB)
208
209 // uint32 xadd(uint32 volatile *ptr, int32 delta)
210 // Atomically:
211 // *val += delta;
212 // return *val;
213 TEXT ·Xadd(SB), NOSPLIT, $0-20
214 MOVD ptr+0(FP), R4
215 MOVW delta+8(FP), R5
216 LWSYNC
217 LWAR (R4), R3
218 ADD R5, R3
219 STWCCC R3, (R4)
220 BNE -3(PC)
221 MOVW R3, ret+16(FP)
222 RET
223
224 // uint64 Xadd64(uint64 volatile *val, int64 delta)
225 // Atomically:
226 // *val += delta;
227 // return *val;
228 TEXT ·Xadd64(SB), NOSPLIT, $0-24
229 MOVD ptr+0(FP), R4
230 MOVD delta+8(FP), R5
231 LWSYNC
232 LDAR (R4), R3
233 ADD R5, R3
234 STDCCC R3, (R4)
235 BNE -3(PC)
236 MOVD R3, ret+16(FP)
237 RET
238
239 // uint32 Xchg(ptr *uint32, new uint32)
240 // Atomically:
241 // old := *ptr;
242 // *ptr = new;
243 // return old;
244 TEXT ·Xchg(SB), NOSPLIT, $0-20
245 MOVD ptr+0(FP), R4
246 MOVW new+8(FP), R5
247 LWSYNC
248 LWAR (R4), R3
249 STWCCC R5, (R4)
250 BNE -2(PC)
251 ISYNC
252 MOVW R3, ret+16(FP)
253 RET
254
255 // uint64 Xchg64(ptr *uint64, new uint64)
256 // Atomically:
257 // old := *ptr;
258 // *ptr = new;
259 // return old;
260 TEXT ·Xchg64(SB), NOSPLIT, $0-24
261 MOVD ptr+0(FP), R4
262 MOVD new+8(FP), R5
263 LWSYNC
264 LDAR (R4), R3
265 STDCCC R5, (R4)
266 BNE -2(PC)
267 ISYNC
268 MOVD R3, ret+16(FP)
269 RET
270
271 TEXT ·Xchgint32(SB), NOSPLIT, $0-20
272 BR ·Xchg(SB)
273
274 TEXT ·Xchgint64(SB), NOSPLIT, $0-24
275 BR ·Xchg64(SB)
276
277 TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
278 BR ·Xchg64(SB)
279
280 TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
281 BR ·Store64(SB)
282
283 TEXT ·Store(SB), NOSPLIT, $0-12
284 MOVD ptr+0(FP), R3
285 MOVW val+8(FP), R4
286 SYNC
287 MOVW R4, 0(R3)
288 RET
289
290 TEXT ·Store8(SB), NOSPLIT, $0-9
291 MOVD ptr+0(FP), R3
292 MOVB val+8(FP), R4
293 SYNC
294 MOVB R4, 0(R3)
295 RET
296
297 TEXT ·Store64(SB), NOSPLIT, $0-16
298 MOVD ptr+0(FP), R3
299 MOVD val+8(FP), R4
300 SYNC
301 MOVD R4, 0(R3)
302 RET
303
304 TEXT ·StoreRel(SB), NOSPLIT, $0-12
305 MOVD ptr+0(FP), R3
306 MOVW val+8(FP), R4
307 LWSYNC
308 MOVW R4, 0(R3)
309 RET
310
311 TEXT ·StoreRel64(SB), NOSPLIT, $0-16
312 MOVD ptr+0(FP), R3
313 MOVD val+8(FP), R4
314 LWSYNC
315 MOVD R4, 0(R3)
316 RET
317
318 // void ·Or8(byte volatile*, byte);
319 TEXT ·Or8(SB), NOSPLIT, $0-9
320 MOVD ptr+0(FP), R3
321 MOVBZ val+8(FP), R4
322 LWSYNC
323 again:
324 LBAR (R3), R6
325 OR R4, R6
326 STBCCC R6, (R3)
327 BNE again
328 RET
329
330 // void ·And8(byte volatile*, byte);
331 TEXT ·And8(SB), NOSPLIT, $0-9
332 MOVD ptr+0(FP), R3
333 MOVBZ val+8(FP), R4
334 LWSYNC
335 again:
336 LBAR (R3), R6
337 AND R4, R6
338 STBCCC R6, (R3)
339 BNE again
340 RET
341
342 // func Or(addr *uint32, v uint32)
343 TEXT ·Or(SB), NOSPLIT, $0-12
344 MOVD ptr+0(FP), R3
345 MOVW val+8(FP), R4
346 LWSYNC
347 again:
348 LWAR (R3), R6
349 OR R4, R6
350 STWCCC R6, (R3)
351 BNE again
352 RET
353
354 // func And(addr *uint32, v uint32)
355 TEXT ·And(SB), NOSPLIT, $0-12
356 MOVD ptr+0(FP), R3
357 MOVW val+8(FP), R4
358 LWSYNC
359 again:
360 LWAR (R3),R6
361 AND R4, R6
362 STWCCC R6, (R3)
363 BNE again
364 RET
365
366 // func Or32(addr *uint32, v uint32) old uint32
367 TEXT ·Or32(SB), NOSPLIT, $0-20
368 MOVD ptr+0(FP), R3
369 MOVW val+8(FP), R4
370 LWSYNC
371 again:
372 LWAR (R3), R6
373 OR R4, R6, R7
374 STWCCC R7, (R3)
375 BNE again
376 MOVW R6, ret+16(FP)
377 RET
378
379 // func And32(addr *uint32, v uint32) old uint32
380 TEXT ·And32(SB), NOSPLIT, $0-20
381 MOVD ptr+0(FP), R3
382 MOVW val+8(FP), R4
383 LWSYNC
384 again:
385 LWAR (R3),R6
386 AND R4, R6, R7
387 STWCCC R7, (R3)
388 BNE again
389 MOVW R6, ret+16(FP)
390 RET
391
392 // func Or64(addr *uint64, v uint64) old uint64
393 TEXT ·Or64(SB), NOSPLIT, $0-24
394 MOVD ptr+0(FP), R3
395 MOVD val+8(FP), R4
396 LWSYNC
397 again:
398 LDAR (R3), R6
399 OR R4, R6, R7
400 STDCCC R7, (R3)
401 BNE again
402 MOVD R6, ret+16(FP)
403 RET
404
405 // func And64(addr *uint64, v uint64) old uint64
406 TEXT ·And64(SB), NOSPLIT, $0-24
407 MOVD ptr+0(FP), R3
408 MOVD val+8(FP), R4
409 LWSYNC
410 again:
411 LDAR (R3),R6
412 AND R4, R6, R7
413 STDCCC R7, (R3)
414 BNE again
415 MOVD R6, ret+16(FP)
416 RET
417
418 // func Anduintptr(addr *uintptr, v uintptr) old uintptr
419 TEXT ·Anduintptr(SB), NOSPLIT, $0-24
420 JMP ·And64(SB)
421
422 // func Oruintptr(addr *uintptr, v uintptr) old uintptr
423 TEXT ·Oruintptr(SB), NOSPLIT, $0-24
424 JMP ·Or64(SB)
425
View as plain text