1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6
7 #include "textflag.h"
8
9 // For more details about how various memory models are
10 // enforced on POWER, the following paper provides more
11 // details about how they enforce C/C++ like models. This
12 // gives context about why the strange looking code
13 // sequences below work.
14 //
15 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
16
17 // uint32 ·Load(uint32 volatile* ptr)
18 TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
19 MOVD ptr+0(FP), R3
20 SYNC
21 MOVWZ 0(R3), R3
22 CMPW R3, R3, CR7
23 BC 4, 30, 1(PC) // bne- cr7,0x4
24 ISYNC
25 MOVW R3, ret+8(FP)
26 RET
27
28 // uint8 ·Load8(uint8 volatile* ptr)
29 TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
30 MOVD ptr+0(FP), R3
31 SYNC
32 MOVBZ 0(R3), R3
33 CMP R3, R3, CR7
34 BC 4, 30, 1(PC) // bne- cr7,0x4
35 ISYNC
36 MOVB R3, ret+8(FP)
37 RET
38
39 // uint64 ·Load64(uint64 volatile* ptr)
40 TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
41 MOVD ptr+0(FP), R3
42 SYNC
43 MOVD 0(R3), R3
44 CMP R3, R3, CR7
45 BC 4, 30, 1(PC) // bne- cr7,0x4
46 ISYNC
47 MOVD R3, ret+8(FP)
48 RET
49
50 // void *·Loadp(void *volatile *ptr)
51 TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
52 MOVD ptr+0(FP), R3
53 SYNC
54 MOVD 0(R3), R3
55 CMP R3, R3, CR7
56 BC 4, 30, 1(PC) // bne- cr7,0x4
57 ISYNC
58 MOVD R3, ret+8(FP)
59 RET
60
61 // uint32 ·LoadAcq(uint32 volatile* ptr)
62 TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
63 MOVD ptr+0(FP), R3
64 MOVWZ 0(R3), R3
65 CMPW R3, R3, CR7
66 BC 4, 30, 1(PC) // bne- cr7, 0x4
67 ISYNC
68 MOVW R3, ret+8(FP)
69 RET
70
71 // uint64 ·LoadAcq64(uint64 volatile* ptr)
72 TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
73 MOVD ptr+0(FP), R3
74 MOVD 0(R3), R3
75 CMP R3, R3, CR7
76 BC 4, 30, 1(PC) // bne- cr7, 0x4
77 ISYNC
78 MOVD R3, ret+8(FP)
79 RET
80
81 // bool cas(uint32 *ptr, uint32 old, uint32 new)
82 // Atomically:
83 // if(*val == old){
84 // *val = new;
85 // return 1;
86 // } else
87 // return 0;
88 TEXT ·Cas(SB), NOSPLIT, $0-17
89 MOVD ptr+0(FP), R3
90 MOVWZ old+8(FP), R4
91 MOVWZ new+12(FP), R5
92 LWSYNC
93 cas_again:
94 LWAR (R3), R6
95 CMPW R6, R4
96 BNE cas_fail
97 STWCCC R5, (R3)
98 BNE cas_again
99 MOVD $1, R3
100 LWSYNC
101 MOVB R3, ret+16(FP)
102 RET
103 cas_fail:
104 LWSYNC
105 MOVB R0, ret+16(FP)
106 RET
107
108 // bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
109 // Atomically:
110 // if(*val == old){
111 // *val = new;
112 // return 1;
113 // } else {
114 // return 0;
115 // }
116 TEXT ·Cas64(SB), NOSPLIT, $0-25
117 MOVD ptr+0(FP), R3
118 MOVD old+8(FP), R4
119 MOVD new+16(FP), R5
120 LWSYNC
121 cas64_again:
122 LDAR (R3), R6
123 CMP R6, R4
124 BNE cas64_fail
125 STDCCC R5, (R3)
126 BNE cas64_again
127 MOVD $1, R3
128 LWSYNC
129 MOVB R3, ret+24(FP)
130 RET
131 cas64_fail:
132 LWSYNC
133 MOVB R0, ret+24(FP)
134 RET
135
136 TEXT ·CasRel(SB), NOSPLIT, $0-17
137 MOVD ptr+0(FP), R3
138 MOVWZ old+8(FP), R4
139 MOVWZ new+12(FP), R5
140 LWSYNC
141 cas_again:
142 LWAR (R3), $0, R6 // 0 = Mutex release hint
143 CMPW R6, R4
144 BNE cas_fail
145 STWCCC R5, (R3)
146 BNE cas_again
147 MOVD $1, R3
148 MOVB R3, ret+16(FP)
149 RET
150 cas_fail:
151 MOVB R0, ret+16(FP)
152 RET
153
154 TEXT ·Casint32(SB), NOSPLIT, $0-17
155 BR ·Cas(SB)
156
157 TEXT ·Casint64(SB), NOSPLIT, $0-25
158 BR ·Cas64(SB)
159
160 TEXT ·Casuintptr(SB), NOSPLIT, $0-25
161 BR ·Cas64(SB)
162
163 TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
164 BR ·Load64(SB)
165
166 TEXT ·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16
167 BR ·LoadAcq64(SB)
168
169 TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
170 BR ·Load64(SB)
171
172 TEXT ·Storeint32(SB), NOSPLIT, $0-12
173 BR ·Store(SB)
174
175 TEXT ·Storeint64(SB), NOSPLIT, $0-16
176 BR ·Store64(SB)
177
178 TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
179 BR ·Store64(SB)
180
181 TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
182 BR ·StoreRel64(SB)
183
184 TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
185 BR ·Xadd64(SB)
186
187 TEXT ·Loadint32(SB), NOSPLIT, $0-12
188 BR ·Load(SB)
189
190 TEXT ·Loadint64(SB), NOSPLIT, $0-16
191 BR ·Load64(SB)
192
193 TEXT ·Xaddint32(SB), NOSPLIT, $0-20
194 BR ·Xadd(SB)
195
196 TEXT ·Xaddint64(SB), NOSPLIT, $0-24
197 BR ·Xadd64(SB)
198
199 // bool casp(void **val, void *old, void *new)
200 // Atomically:
201 // if(*val == old){
202 // *val = new;
203 // return 1;
204 // } else
205 // return 0;
206 TEXT ·Casp1(SB), NOSPLIT, $0-25
207 BR ·Cas64(SB)
208
209 // uint32 xadd(uint32 volatile *ptr, int32 delta)
210 // Atomically:
211 // *val += delta;
212 // return *val;
213 TEXT ·Xadd(SB), NOSPLIT, $0-20
214 MOVD ptr+0(FP), R4
215 MOVW delta+8(FP), R5
216 LWSYNC
217 LWAR (R4), R3
218 ADD R5, R3
219 STWCCC R3, (R4)
220 BNE -3(PC)
221 MOVW R3, ret+16(FP)
222 RET
223
224 // uint64 Xadd64(uint64 volatile *val, int64 delta)
225 // Atomically:
226 // *val += delta;
227 // return *val;
228 TEXT ·Xadd64(SB), NOSPLIT, $0-24
229 MOVD ptr+0(FP), R4
230 MOVD delta+8(FP), R5
231 LWSYNC
232 LDAR (R4), R3
233 ADD R5, R3
234 STDCCC R3, (R4)
235 BNE -3(PC)
236 MOVD R3, ret+16(FP)
237 RET
238
239 // uint8 Xchg(ptr *uint8, new uint8)
240 // Atomically:
241 // old := *ptr;
242 // *ptr = new;
243 // return old;
244 TEXT ·Xchg8(SB), NOSPLIT, $0-17
245 MOVD ptr+0(FP), R4
246 MOVB new+8(FP), R5
247 LWSYNC
248 LBAR (R4), R3
249 STBCCC R5, (R4)
250 BNE -2(PC)
251 ISYNC
252 MOVB R3, ret+16(FP)
253 RET
254
255 // uint32 Xchg(ptr *uint32, new uint32)
256 // Atomically:
257 // old := *ptr;
258 // *ptr = new;
259 // return old;
260 TEXT ·Xchg(SB), NOSPLIT, $0-20
261 MOVD ptr+0(FP), R4
262 MOVW new+8(FP), R5
263 LWSYNC
264 LWAR (R4), R3
265 STWCCC R5, (R4)
266 BNE -2(PC)
267 ISYNC
268 MOVW R3, ret+16(FP)
269 RET
270
271 // uint64 Xchg64(ptr *uint64, new uint64)
272 // Atomically:
273 // old := *ptr;
274 // *ptr = new;
275 // return old;
276 TEXT ·Xchg64(SB), NOSPLIT, $0-24
277 MOVD ptr+0(FP), R4
278 MOVD new+8(FP), R5
279 LWSYNC
280 LDAR (R4), R3
281 STDCCC R5, (R4)
282 BNE -2(PC)
283 ISYNC
284 MOVD R3, ret+16(FP)
285 RET
286
287 TEXT ·Xchgint32(SB), NOSPLIT, $0-20
288 BR ·Xchg(SB)
289
290 TEXT ·Xchgint64(SB), NOSPLIT, $0-24
291 BR ·Xchg64(SB)
292
293 TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
294 BR ·Xchg64(SB)
295
296 TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
297 BR ·Store64(SB)
298
299 TEXT ·Store(SB), NOSPLIT, $0-12
300 MOVD ptr+0(FP), R3
301 MOVW val+8(FP), R4
302 SYNC
303 MOVW R4, 0(R3)
304 RET
305
306 TEXT ·Store8(SB), NOSPLIT, $0-9
307 MOVD ptr+0(FP), R3
308 MOVB val+8(FP), R4
309 SYNC
310 MOVB R4, 0(R3)
311 RET
312
313 TEXT ·Store64(SB), NOSPLIT, $0-16
314 MOVD ptr+0(FP), R3
315 MOVD val+8(FP), R4
316 SYNC
317 MOVD R4, 0(R3)
318 RET
319
320 TEXT ·StoreRel(SB), NOSPLIT, $0-12
321 MOVD ptr+0(FP), R3
322 MOVW val+8(FP), R4
323 LWSYNC
324 MOVW R4, 0(R3)
325 RET
326
327 TEXT ·StoreRel64(SB), NOSPLIT, $0-16
328 MOVD ptr+0(FP), R3
329 MOVD val+8(FP), R4
330 LWSYNC
331 MOVD R4, 0(R3)
332 RET
333
334 // void ·Or8(byte volatile*, byte);
335 TEXT ·Or8(SB), NOSPLIT, $0-9
336 MOVD ptr+0(FP), R3
337 MOVBZ val+8(FP), R4
338 LWSYNC
339 again:
340 LBAR (R3), R6
341 OR R4, R6
342 STBCCC R6, (R3)
343 BNE again
344 RET
345
346 // void ·And8(byte volatile*, byte);
347 TEXT ·And8(SB), NOSPLIT, $0-9
348 MOVD ptr+0(FP), R3
349 MOVBZ val+8(FP), R4
350 LWSYNC
351 again:
352 LBAR (R3), R6
353 AND R4, R6
354 STBCCC R6, (R3)
355 BNE again
356 RET
357
358 // func Or(addr *uint32, v uint32)
359 TEXT ·Or(SB), NOSPLIT, $0-12
360 MOVD ptr+0(FP), R3
361 MOVW val+8(FP), R4
362 LWSYNC
363 again:
364 LWAR (R3), R6
365 OR R4, R6
366 STWCCC R6, (R3)
367 BNE again
368 RET
369
370 // func And(addr *uint32, v uint32)
371 TEXT ·And(SB), NOSPLIT, $0-12
372 MOVD ptr+0(FP), R3
373 MOVW val+8(FP), R4
374 LWSYNC
375 again:
376 LWAR (R3),R6
377 AND R4, R6
378 STWCCC R6, (R3)
379 BNE again
380 RET
381
382 // func Or32(addr *uint32, v uint32) old uint32
383 TEXT ·Or32(SB), NOSPLIT, $0-20
384 MOVD ptr+0(FP), R3
385 MOVW val+8(FP), R4
386 LWSYNC
387 again:
388 LWAR (R3), R6
389 OR R4, R6, R7
390 STWCCC R7, (R3)
391 BNE again
392 MOVW R6, ret+16(FP)
393 RET
394
395 // func And32(addr *uint32, v uint32) old uint32
396 TEXT ·And32(SB), NOSPLIT, $0-20
397 MOVD ptr+0(FP), R3
398 MOVW val+8(FP), R4
399 LWSYNC
400 again:
401 LWAR (R3),R6
402 AND R4, R6, R7
403 STWCCC R7, (R3)
404 BNE again
405 MOVW R6, ret+16(FP)
406 RET
407
408 // func Or64(addr *uint64, v uint64) old uint64
409 TEXT ·Or64(SB), NOSPLIT, $0-24
410 MOVD ptr+0(FP), R3
411 MOVD val+8(FP), R4
412 LWSYNC
413 again:
414 LDAR (R3), R6
415 OR R4, R6, R7
416 STDCCC R7, (R3)
417 BNE again
418 MOVD R6, ret+16(FP)
419 RET
420
421 // func And64(addr *uint64, v uint64) old uint64
422 TEXT ·And64(SB), NOSPLIT, $0-24
423 MOVD ptr+0(FP), R3
424 MOVD val+8(FP), R4
425 LWSYNC
426 again:
427 LDAR (R3),R6
428 AND R4, R6, R7
429 STDCCC R7, (R3)
430 BNE again
431 MOVD R6, ret+16(FP)
432 RET
433
434 // func Anduintptr(addr *uintptr, v uintptr) old uintptr
435 TEXT ·Anduintptr(SB), NOSPLIT, $0-24
436 JMP ·And64(SB)
437
438 // func Oruintptr(addr *uintptr, v uintptr) old uintptr
439 TEXT ·Oruintptr(SB), NOSPLIT, $0-24
440 JMP ·Or64(SB)
441
View as plain text