Text file
src/runtime/race_arm64.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "tls_arm64.h"
11 #include "cgo/abi_arm64.h"
12
13 // The following thunks allow calling the gcc-compiled race runtime directly
14 // from Go code without going all the way through cgo.
15 // First, it's much faster (up to 50% speedup for real Go programs).
16 // Second, it eliminates race-related special cases from cgocall and scheduler.
17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19 // A brief recap of the arm64 calling convention.
20 // Arguments are passed in R0...R7, the rest is on stack.
21 // Callee-saved registers are: R19...R28.
22 // Temporary registers are: R9...R15
23 // SP must be 16-byte aligned.
24
25 // When calling racecalladdr, R9 is the call target address.
26
27 // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
28
29 // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
30 // No-op on other OSes.
31 #ifdef TLS_darwin
32 #define TP_ALIGN AND $~7, R0
33 #else
34 #define TP_ALIGN
35 #endif
36
37 // Load g from TLS. (See tls_arm64.s)
38 #define load_g \
39 MRS_TPIDR_R0 \
40 TP_ALIGN \
41 MOVD runtime·tls_g(SB), R11 \
42 MOVD (R0)(R11), g
43
44 // func runtime·raceread(addr uintptr)
45 // Called from instrumented code.
46 // Defined as ABIInternal so as to avoid introducing a wrapper,
47 // which would make caller's PC ineffective.
48 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
49 MOVD R0, R1 // addr
50 MOVD LR, R2
51 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
52 MOVD $__tsan_read(SB), R9
53 JMP racecalladdr<>(SB)
54
55 // func runtime·RaceRead(addr uintptr)
56 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
57 // This needs to be a tail call, because raceread reads caller pc.
58 JMP runtime·raceread(SB)
59
60 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
61 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
62 MOVD addr+0(FP), R1
63 MOVD callpc+8(FP), R2
64 MOVD pc+16(FP), R3
65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOVD $__tsan_read_pc(SB), R9
67 JMP racecalladdr<>(SB)
68
69 // func runtime·racewrite(addr uintptr)
70 // Called from instrumented code.
71 // Defined as ABIInternal so as to avoid introducing a wrapper,
72 // which would make caller's PC ineffective.
73 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74 MOVD R0, R1 // addr
75 MOVD LR, R2
76 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
77 MOVD $__tsan_write(SB), R9
78 JMP racecalladdr<>(SB)
79
80 // func runtime·RaceWrite(addr uintptr)
81 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
82 // This needs to be a tail call, because racewrite reads caller pc.
83 JMP runtime·racewrite(SB)
84
85 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
86 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
87 MOVD addr+0(FP), R1
88 MOVD callpc+8(FP), R2
89 MOVD pc+16(FP), R3
90 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
91 MOVD $__tsan_write_pc(SB), R9
92 JMP racecalladdr<>(SB)
93
94 // func runtime·racereadrange(addr, size uintptr)
95 // Called from instrumented code.
96 // Defined as ABIInternal so as to avoid introducing a wrapper,
97 // which would make caller's PC ineffective.
98 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
99 MOVD R1, R2 // size
100 MOVD R0, R1 // addr
101 MOVD LR, R3
102 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
103 MOVD $__tsan_read_range(SB), R9
104 JMP racecalladdr<>(SB)
105
106 // func runtime·RaceReadRange(addr, size uintptr)
107 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
108 // This needs to be a tail call, because racereadrange reads caller pc.
109 JMP runtime·racereadrange(SB)
110
111 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
112 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
113 MOVD addr+0(FP), R1
114 MOVD size+8(FP), R2
115 MOVD pc+16(FP), R3
116 ADD $4, R3 // pc is function start, tsan wants return address.
117 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
118 MOVD $__tsan_read_range(SB), R9
119 JMP racecalladdr<>(SB)
120
121 // func runtime·racewriterange(addr, size uintptr)
122 // Called from instrumented code.
123 // Defined as ABIInternal so as to avoid introducing a wrapper,
124 // which would make caller's PC ineffective.
125 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
126 MOVD R1, R2 // size
127 MOVD R0, R1 // addr
128 MOVD LR, R3
129 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
130 MOVD $__tsan_write_range(SB), R9
131 JMP racecalladdr<>(SB)
132
133 // func runtime·RaceWriteRange(addr, size uintptr)
134 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
135 // This needs to be a tail call, because racewriterange reads caller pc.
136 JMP runtime·racewriterange(SB)
137
138 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
139 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
140 MOVD addr+0(FP), R1
141 MOVD size+8(FP), R2
142 MOVD pc+16(FP), R3
143 ADD $4, R3 // pc is function start, tsan wants return address.
144 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
145 MOVD $__tsan_write_range(SB), R9
146 JMP racecalladdr<>(SB)
147
148 // If addr (R1) is out of range, do nothing.
149 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
150 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
151 load_g
152 MOVD g_racectx(g), R0
153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
154 MOVD runtime·racearenastart(SB), R10
155 CMP R10, R1
156 BLT data
157 MOVD runtime·racearenaend(SB), R10
158 CMP R10, R1
159 BLT call
160 data:
161 MOVD runtime·racedatastart(SB), R10
162 CMP R10, R1
163 BLT ret
164 MOVD runtime·racedataend(SB), R10
165 CMP R10, R1
166 BGT ret
167 call:
168 JMP racecall<>(SB)
169 ret:
170 RET
171
172 // func runtime·racefuncenter(pc uintptr)
173 // Called from instrumented code.
174 TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
175 MOVD R0, R9 // callpc
176 JMP racefuncenter<>(SB)
177
178 // Common code for racefuncenter
179 // R9 = caller's return address
180 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
181 load_g
182 MOVD g_racectx(g), R0 // goroutine racectx
183 MOVD R9, R1
184 // void __tsan_func_enter(ThreadState *thr, void *pc);
185 MOVD $__tsan_func_enter(SB), R9
186 BL racecall<>(SB)
187 RET
188
189 // func runtime·racefuncexit()
190 // Called from instrumented code.
191 TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
192 load_g
193 MOVD g_racectx(g), R0 // race context
194 // void __tsan_func_exit(ThreadState *thr);
195 MOVD $__tsan_func_exit(SB), R9
196 JMP racecall<>(SB)
197
198 // Atomic operations for sync/atomic package.
199 // R3 = addr of arguments passed to this function, it can
200 // be fetched at 40(RSP) in racecallatomic after two times BL
201 // R0, R1, R2 set in racecallatomic
202
203 // Load
204 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
205 GO_ARGS
206 MOVD $__tsan_go_atomic32_load(SB), R9
207 BL racecallatomic<>(SB)
208 RET
209
210 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
211 GO_ARGS
212 MOVD $__tsan_go_atomic64_load(SB), R9
213 BL racecallatomic<>(SB)
214 RET
215
216 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
217 GO_ARGS
218 JMP sync∕atomic·LoadInt32(SB)
219
220 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·LoadInt64(SB)
223
224 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
225 GO_ARGS
226 JMP sync∕atomic·LoadInt64(SB)
227
228 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
229 GO_ARGS
230 JMP sync∕atomic·LoadInt64(SB)
231
232 // Store
233 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
234 GO_ARGS
235 MOVD $__tsan_go_atomic32_store(SB), R9
236 BL racecallatomic<>(SB)
237 RET
238
239 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
240 GO_ARGS
241 MOVD $__tsan_go_atomic64_store(SB), R9
242 BL racecallatomic<>(SB)
243 RET
244
245 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
246 GO_ARGS
247 JMP sync∕atomic·StoreInt32(SB)
248
249 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
250 GO_ARGS
251 JMP sync∕atomic·StoreInt64(SB)
252
253 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
254 GO_ARGS
255 JMP sync∕atomic·StoreInt64(SB)
256
257 // Swap
258 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
259 GO_ARGS
260 MOVD $__tsan_go_atomic32_exchange(SB), R9
261 BL racecallatomic<>(SB)
262 RET
263
264 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
265 GO_ARGS
266 MOVD $__tsan_go_atomic64_exchange(SB), R9
267 BL racecallatomic<>(SB)
268 RET
269
270 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
271 GO_ARGS
272 JMP sync∕atomic·SwapInt32(SB)
273
274 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
275 GO_ARGS
276 JMP sync∕atomic·SwapInt64(SB)
277
278 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
279 GO_ARGS
280 JMP sync∕atomic·SwapInt64(SB)
281
282 // Add
283 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
284 GO_ARGS
285 MOVD $__tsan_go_atomic32_fetch_add(SB), R9
286 BL racecallatomic<>(SB)
287 MOVW add+8(FP), R0 // convert fetch_add to add_fetch
288 MOVW ret+16(FP), R1
289 ADD R0, R1, R0
290 MOVW R0, ret+16(FP)
291 RET
292
293 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
294 GO_ARGS
295 MOVD $__tsan_go_atomic64_fetch_add(SB), R9
296 BL racecallatomic<>(SB)
297 MOVD add+8(FP), R0 // convert fetch_add to add_fetch
298 MOVD ret+16(FP), R1
299 ADD R0, R1, R0
300 MOVD R0, ret+16(FP)
301 RET
302
303 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
304 GO_ARGS
305 JMP sync∕atomic·AddInt32(SB)
306
307 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
308 GO_ARGS
309 JMP sync∕atomic·AddInt64(SB)
310
311 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
312 GO_ARGS
313 JMP sync∕atomic·AddInt64(SB)
314
315 // And
316 TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
317 GO_ARGS
318 MOVD $__tsan_go_atomic32_fetch_and(SB), R9
319 BL racecallatomic<>(SB)
320 RET
321
322 TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
323 GO_ARGS
324 MOVD $__tsan_go_atomic64_fetch_and(SB), R9
325 BL racecallatomic<>(SB)
326 RET
327
328 TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
329 GO_ARGS
330 JMP sync∕atomic·AndInt32(SB)
331
332 TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
333 GO_ARGS
334 JMP sync∕atomic·AndInt64(SB)
335
336 TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
337 GO_ARGS
338 JMP sync∕atomic·AndInt64(SB)
339
340 // Or
341 TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
342 GO_ARGS
343 MOVD $__tsan_go_atomic32_fetch_or(SB), R9
344 BL racecallatomic<>(SB)
345 RET
346
347 TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
348 GO_ARGS
349 MOVD $__tsan_go_atomic64_fetch_or(SB), R9
350 BL racecallatomic<>(SB)
351 RET
352
353 TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
354 GO_ARGS
355 JMP sync∕atomic·OrInt32(SB)
356
357 TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
358 GO_ARGS
359 JMP sync∕atomic·OrInt64(SB)
360
361 TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
362 GO_ARGS
363 JMP sync∕atomic·OrInt64(SB)
364
365 // CompareAndSwap
366 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
367 GO_ARGS
368 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
369 BL racecallatomic<>(SB)
370 RET
371
372 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
373 GO_ARGS
374 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
375 BL racecallatomic<>(SB)
376 RET
377
378 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
379 GO_ARGS
380 JMP sync∕atomic·CompareAndSwapInt32(SB)
381
382 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
383 GO_ARGS
384 JMP sync∕atomic·CompareAndSwapInt64(SB)
385
386 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
387 GO_ARGS
388 JMP sync∕atomic·CompareAndSwapInt64(SB)
389
390 // Generic atomic operation implementation.
391 // R9 = addr of target function
392 TEXT racecallatomic<>(SB), NOSPLIT, $0
393 // Set up these registers
394 // R0 = *ThreadState
395 // R1 = caller pc
396 // R2 = pc
397 // R3 = addr of incoming arg list
398
399 // Trigger SIGSEGV early.
400 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
401 MOVB (R3), R13 // segv here if addr is bad
402 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
403 MOVD runtime·racearenastart(SB), R10
404 CMP R10, R3
405 BLT racecallatomic_data
406 MOVD runtime·racearenaend(SB), R10
407 CMP R10, R3
408 BLT racecallatomic_ok
409 racecallatomic_data:
410 MOVD runtime·racedatastart(SB), R10
411 CMP R10, R3
412 BLT racecallatomic_ignore
413 MOVD runtime·racedataend(SB), R10
414 CMP R10, R3
415 BGE racecallatomic_ignore
416 racecallatomic_ok:
417 // Addr is within the good range, call the atomic function.
418 load_g
419 MOVD g_racectx(g), R0 // goroutine context
420 MOVD 16(RSP), R1 // caller pc
421 MOVD R9, R2 // pc
422 ADD $40, RSP, R3
423 JMP racecall<>(SB) // does not return
424 racecallatomic_ignore:
425 // Addr is outside the good range.
426 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
427 // An attempt to synchronize on the address would cause crash.
428 MOVD R9, R21 // remember the original function
429 MOVD $__tsan_go_ignore_sync_begin(SB), R9
430 load_g
431 MOVD g_racectx(g), R0 // goroutine context
432 BL racecall<>(SB)
433 MOVD R21, R9 // restore the original function
434 // Call the atomic function.
435 // racecall will call LLVM race code which might clobber R28 (g)
436 load_g
437 MOVD g_racectx(g), R0 // goroutine context
438 MOVD 16(RSP), R1 // caller pc
439 MOVD R9, R2 // pc
440 ADD $40, RSP, R3 // arguments
441 BL racecall<>(SB)
442 // Call __tsan_go_ignore_sync_end.
443 MOVD $__tsan_go_ignore_sync_end(SB), R9
444 MOVD g_racectx(g), R0 // goroutine context
445 BL racecall<>(SB)
446 RET
447
448 // func runtime·racecall(void(*f)(...), ...)
449 // Calls C function f from race runtime and passes up to 4 arguments to it.
450 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
451 TEXT runtime·racecall(SB), NOSPLIT, $0-0
452 MOVD fn+0(FP), R9
453 MOVD arg0+8(FP), R0
454 MOVD arg1+16(FP), R1
455 MOVD arg2+24(FP), R2
456 MOVD arg3+32(FP), R3
457 JMP racecall<>(SB)
458
459 // Switches SP to g0 stack and calls (R9). Arguments already set.
460 // Clobbers R19, R20.
461 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
462 MOVD g_m(g), R10
463 // Switch to g0 stack.
464 MOVD RSP, R19 // callee-saved, preserved across the CALL
465 MOVD R30, R20 // callee-saved, preserved across the CALL
466 MOVD m_g0(R10), R11
467 CMP R11, g
468 BEQ call // already on g0
469 MOVD (g_sched+gobuf_sp)(R11), R12
470 MOVD R12, RSP
471 call:
472 // Decrement SP past where the frame pointer is saved in the Go arm64
473 // ABI (one word below the stack pointer) so the race detector library
474 // code doesn't clobber it
475 SUB $16, RSP
476 BL R9
477 MOVD R19, RSP
478 JMP (R20)
479
480 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
481 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
482 // The overall effect of Go->C->Go call chain is similar to that of mcall.
483 // R0 contains command code. R1 contains command-specific context.
484 // See racecallback for command codes.
485 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
486 // Handle command raceGetProcCmd (0) here.
487 // First, code below assumes that we are on curg, while raceGetProcCmd
488 // can be executed on g0. Second, it is called frequently, so will
489 // benefit from this fast path.
490 CBNZ R0, rest
491 MOVD g, R13
492 #ifdef TLS_darwin
493 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
494 #endif
495 load_g
496 #ifdef TLS_darwin
497 MOVD R12, R27
498 #endif
499 MOVD g_m(g), R0
500 MOVD m_p(R0), R0
501 MOVD p_raceprocctx(R0), R0
502 MOVD R0, (R1)
503 MOVD R13, g
504 JMP (LR)
505 rest:
506 // Save callee-saved registers (Go code won't respect that).
507 // 8(RSP) and 16(RSP) are for args passed through racecallback
508 SUB $176, RSP
509 MOVD LR, 0(RSP)
510
511 SAVE_R19_TO_R28(8*3)
512 SAVE_F8_TO_F15(8*13)
513 MOVD R29, (8*21)(RSP)
514 // Set g = g0.
515 // load_g will clobber R0, Save R0
516 MOVD R0, R13
517 load_g
518 // restore R0
519 MOVD R13, R0
520 MOVD g_m(g), R13
521 MOVD m_g0(R13), R14
522 CMP R14, g
523 BEQ noswitch // branch if already on g0
524 MOVD R14, g
525
526 MOVD R0, 8(RSP) // func arg
527 MOVD R1, 16(RSP) // func arg
528 BL runtime·racecallback(SB)
529
530 // All registers are smashed after Go code, reload.
531 MOVD g_m(g), R13
532 MOVD m_curg(R13), g // g = m->curg
533 ret:
534 // Restore callee-saved registers.
535 MOVD 0(RSP), LR
536 MOVD (8*21)(RSP), R29
537 RESTORE_F8_TO_F15(8*13)
538 RESTORE_R19_TO_R28(8*3)
539 ADD $176, RSP
540 JMP (LR)
541
542 noswitch:
543 // already on g0
544 MOVD R0, 8(RSP) // func arg
545 MOVD R1, 16(RSP) // func arg
546 BL runtime·racecallback(SB)
547 JMP ret
548
549 #ifndef TLSG_IS_VARIABLE
550 // tls_g, g value for each thread in TLS
551 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
552 #endif
553
View as plain text