Text file
src/runtime/race_amd64.s
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "go_tls.h"
9 #include "funcdata.h"
10 #include "textflag.h"
11 #include "cgo/abi_amd64.h"
12
13 // The following thunks allow calling the gcc-compiled race runtime directly
14 // from Go code without going all the way through cgo.
15 // First, it's much faster (up to 50% speedup for real Go programs).
16 // Second, it eliminates race-related special cases from cgocall and scheduler.
17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19 // A brief recap of the amd64 calling convention.
20 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
21 // Callee-saved registers are: BX, BP, R12-R15.
22 // SP must be 16-byte aligned.
23 // On Windows:
24 // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
25 // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
26 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
27 // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention
28 // We do not do this, because it seems to be intended for vararg/unprototyped functions.
29 // Gcc-compiled race runtime does not try to use that space.
30
31 #ifdef GOOS_windows
32 #define RARG0 CX
33 #define RARG1 DX
34 #define RARG2 R8
35 #define RARG3 R9
36 #else
37 #define RARG0 DI
38 #define RARG1 SI
39 #define RARG2 DX
40 #define RARG3 CX
41 #endif
42
43 // func runtime·raceread(addr uintptr)
44 // Called from instrumented code.
45 // Defined as ABIInternal so as to avoid introducing a wrapper,
46 // which would render runtime.getcallerpc ineffective.
47 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
48 MOVQ AX, RARG1
49 MOVQ (SP), RARG2
50 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
51 MOVQ $__tsan_read(SB), AX
52 JMP racecalladdr<>(SB)
53
54 // func runtime·RaceRead(addr uintptr)
55 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
56 // This needs to be a tail call, because raceread reads caller pc.
57 JMP runtime·raceread(SB)
58
59 // void runtime·racereadpc(void *addr, void *callpc, void *pc)
60 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
61 MOVQ addr+0(FP), RARG1
62 MOVQ callpc+8(FP), RARG2
63 MOVQ pc+16(FP), RARG3
64 ADDQ $1, RARG3 // pc is function start, tsan wants return address
65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOVQ $__tsan_read_pc(SB), AX
67 JMP racecalladdr<>(SB)
68
69 // func runtime·racewrite(addr uintptr)
70 // Called from instrumented code.
71 // Defined as ABIInternal so as to avoid introducing a wrapper,
72 // which would render runtime.getcallerpc ineffective.
73 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74 MOVQ AX, RARG1
75 MOVQ (SP), RARG2
76 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
77 MOVQ $__tsan_write(SB), AX
78 JMP racecalladdr<>(SB)
79
80 // func runtime·RaceWrite(addr uintptr)
81 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
82 // This needs to be a tail call, because racewrite reads caller pc.
83 JMP runtime·racewrite(SB)
84
85 // void runtime·racewritepc(void *addr, void *callpc, void *pc)
86 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
87 MOVQ addr+0(FP), RARG1
88 MOVQ callpc+8(FP), RARG2
89 MOVQ pc+16(FP), RARG3
90 ADDQ $1, RARG3 // pc is function start, tsan wants return address
91 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
92 MOVQ $__tsan_write_pc(SB), AX
93 JMP racecalladdr<>(SB)
94
95 // func runtime·racereadrange(addr, size uintptr)
96 // Called from instrumented code.
97 // Defined as ABIInternal so as to avoid introducing a wrapper,
98 // which would render runtime.getcallerpc ineffective.
99 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
100 MOVQ AX, RARG1
101 MOVQ BX, RARG2
102 MOVQ (SP), RARG3
103 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
104 MOVQ $__tsan_read_range(SB), AX
105 JMP racecalladdr<>(SB)
106
107 // func runtime·RaceReadRange(addr, size uintptr)
108 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
109 // This needs to be a tail call, because racereadrange reads caller pc.
110 JMP runtime·racereadrange(SB)
111
112 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
113 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
114 MOVQ addr+0(FP), RARG1
115 MOVQ size+8(FP), RARG2
116 MOVQ pc+16(FP), RARG3
117 ADDQ $1, RARG3 // pc is function start, tsan wants return address
118 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
119 MOVQ $__tsan_read_range(SB), AX
120 JMP racecalladdr<>(SB)
121
122 // func runtime·racewriterange(addr, size uintptr)
123 // Called from instrumented code.
124 // Defined as ABIInternal so as to avoid introducing a wrapper,
125 // which would render runtime.getcallerpc ineffective.
126 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
127 MOVQ AX, RARG1
128 MOVQ BX, RARG2
129 MOVQ (SP), RARG3
130 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
131 MOVQ $__tsan_write_range(SB), AX
132 JMP racecalladdr<>(SB)
133
134 // func runtime·RaceWriteRange(addr, size uintptr)
135 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
136 // This needs to be a tail call, because racewriterange reads caller pc.
137 JMP runtime·racewriterange(SB)
138
139 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
140 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
141 MOVQ addr+0(FP), RARG1
142 MOVQ size+8(FP), RARG2
143 MOVQ pc+16(FP), RARG3
144 ADDQ $1, RARG3 // pc is function start, tsan wants return address
145 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
146 MOVQ $__tsan_write_range(SB), AX
147 JMP racecalladdr<>(SB)
148
149 // If addr (RARG1) is out of range, do nothing.
150 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
151 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
152 MOVQ g_racectx(R14), RARG0 // goroutine context
153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
154 CMPQ RARG1, runtime·racearenastart(SB)
155 JB data
156 CMPQ RARG1, runtime·racearenaend(SB)
157 JB call
158 data:
159 CMPQ RARG1, runtime·racedatastart(SB)
160 JB ret
161 CMPQ RARG1, runtime·racedataend(SB)
162 JAE ret
163 call:
164 MOVQ AX, AX // w/o this 6a miscompiles this function
165 JMP racecall<>(SB)
166 ret:
167 RET
168
169 // func runtime·racefuncenter(pc uintptr)
170 // Called from instrumented code.
171 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
172 MOVQ callpc+0(FP), R11
173 JMP racefuncenter<>(SB)
174
175 // Common code for racefuncenter
176 // R11 = caller's return address
177 TEXT racefuncenter<>(SB), NOSPLIT|NOFRAME, $0-0
178 MOVQ DX, BX // save function entry context (for closures)
179 MOVQ g_racectx(R14), RARG0 // goroutine context
180 MOVQ R11, RARG1
181 // void __tsan_func_enter(ThreadState *thr, void *pc);
182 MOVQ $__tsan_func_enter(SB), AX
183 // racecall<> preserves BX
184 CALL racecall<>(SB)
185 MOVQ BX, DX // restore function entry context
186 RET
187
188 // func runtime·racefuncexit()
189 // Called from instrumented code.
190 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
191 MOVQ g_racectx(R14), RARG0 // goroutine context
192 // void __tsan_func_exit(ThreadState *thr);
193 MOVQ $__tsan_func_exit(SB), AX
194 JMP racecall<>(SB)
195
196 // Atomic operations for sync/atomic package.
197
198 // Load
199 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT|NOFRAME, $0-12
200 GO_ARGS
201 MOVQ $__tsan_go_atomic32_load(SB), AX
202 CALL racecallatomic<>(SB)
203 RET
204
205 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT|NOFRAME, $0-16
206 GO_ARGS
207 MOVQ $__tsan_go_atomic64_load(SB), AX
208 CALL racecallatomic<>(SB)
209 RET
210
211 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
212 GO_ARGS
213 JMP sync∕atomic·LoadInt32(SB)
214
215 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
216 GO_ARGS
217 JMP sync∕atomic·LoadInt64(SB)
218
219 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
220 GO_ARGS
221 JMP sync∕atomic·LoadInt64(SB)
222
223 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
224 GO_ARGS
225 JMP sync∕atomic·LoadInt64(SB)
226
227 // Store
228 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT|NOFRAME, $0-12
229 GO_ARGS
230 MOVQ $__tsan_go_atomic32_store(SB), AX
231 CALL racecallatomic<>(SB)
232 RET
233
234 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT|NOFRAME, $0-16
235 GO_ARGS
236 MOVQ $__tsan_go_atomic64_store(SB), AX
237 CALL racecallatomic<>(SB)
238 RET
239
240 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
241 GO_ARGS
242 JMP sync∕atomic·StoreInt32(SB)
243
244 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
245 GO_ARGS
246 JMP sync∕atomic·StoreInt64(SB)
247
248 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
249 GO_ARGS
250 JMP sync∕atomic·StoreInt64(SB)
251
252 // Swap
253 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT|NOFRAME, $0-20
254 GO_ARGS
255 MOVQ $__tsan_go_atomic32_exchange(SB), AX
256 CALL racecallatomic<>(SB)
257 RET
258
259 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT|NOFRAME, $0-24
260 GO_ARGS
261 MOVQ $__tsan_go_atomic64_exchange(SB), AX
262 CALL racecallatomic<>(SB)
263 RET
264
265 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
266 GO_ARGS
267 JMP sync∕atomic·SwapInt32(SB)
268
269 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
270 GO_ARGS
271 JMP sync∕atomic·SwapInt64(SB)
272
273 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
274 GO_ARGS
275 JMP sync∕atomic·SwapInt64(SB)
276
277 // Add
278 TEXT sync∕atomic·AddInt32(SB), NOSPLIT|NOFRAME, $0-20
279 GO_ARGS
280 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX
281 CALL racecallatomic<>(SB)
282 MOVL add+8(FP), AX // convert fetch_add to add_fetch
283 ADDL AX, ret+16(FP)
284 RET
285
286 TEXT sync∕atomic·AddInt64(SB), NOSPLIT|NOFRAME, $0-24
287 GO_ARGS
288 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX
289 CALL racecallatomic<>(SB)
290 MOVQ add+8(FP), AX // convert fetch_add to add_fetch
291 ADDQ AX, ret+16(FP)
292 RET
293
294 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
295 GO_ARGS
296 JMP sync∕atomic·AddInt32(SB)
297
298 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
299 GO_ARGS
300 JMP sync∕atomic·AddInt64(SB)
301
302 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
303 GO_ARGS
304 JMP sync∕atomic·AddInt64(SB)
305
306 // And
307 TEXT sync∕atomic·AndInt32(SB), NOSPLIT|NOFRAME, $0-20
308 GO_ARGS
309 MOVQ $__tsan_go_atomic32_fetch_and(SB), AX
310 CALL racecallatomic<>(SB)
311 RET
312
313 TEXT sync∕atomic·AndInt64(SB), NOSPLIT|NOFRAME, $0-24
314 GO_ARGS
315 MOVQ $__tsan_go_atomic64_fetch_and(SB), AX
316 CALL racecallatomic<>(SB)
317 RET
318
319 TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
320 GO_ARGS
321 JMP sync∕atomic·AndInt32(SB)
322
323 TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
324 GO_ARGS
325 JMP sync∕atomic·AndInt64(SB)
326
327 TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
328 GO_ARGS
329 JMP sync∕atomic·AndInt64(SB)
330
331 // Or
332 TEXT sync∕atomic·OrInt32(SB), NOSPLIT|NOFRAME, $0-20
333 GO_ARGS
334 MOVQ $__tsan_go_atomic32_fetch_or(SB), AX
335 CALL racecallatomic<>(SB)
336 RET
337
338 TEXT sync∕atomic·OrInt64(SB), NOSPLIT|NOFRAME, $0-24
339 GO_ARGS
340 MOVQ $__tsan_go_atomic64_fetch_or(SB), AX
341 CALL racecallatomic<>(SB)
342 RET
343
344 TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
345 GO_ARGS
346 JMP sync∕atomic·OrInt32(SB)
347
348 TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
349 GO_ARGS
350 JMP sync∕atomic·OrInt64(SB)
351
352 TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
353 GO_ARGS
354 JMP sync∕atomic·OrInt64(SB)
355
356
357 // CompareAndSwap
358 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT|NOFRAME, $0-17
359 GO_ARGS
360 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX
361 CALL racecallatomic<>(SB)
362 RET
363
364 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT|NOFRAME, $0-25
365 GO_ARGS
366 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX
367 CALL racecallatomic<>(SB)
368 RET
369
370 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
371 GO_ARGS
372 JMP sync∕atomic·CompareAndSwapInt32(SB)
373
374 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
375 GO_ARGS
376 JMP sync∕atomic·CompareAndSwapInt64(SB)
377
378 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
379 GO_ARGS
380 JMP sync∕atomic·CompareAndSwapInt64(SB)
381
382 // Generic atomic operation implementation.
383 // AX already contains target function.
384 TEXT racecallatomic<>(SB), NOSPLIT|NOFRAME, $0-0
385 // Trigger SIGSEGV early.
386 MOVQ 16(SP), R12
387 MOVBLZX (R12), R13
388 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
389 CMPQ R12, runtime·racearenastart(SB)
390 JB racecallatomic_data
391 CMPQ R12, runtime·racearenaend(SB)
392 JB racecallatomic_ok
393 racecallatomic_data:
394 CMPQ R12, runtime·racedatastart(SB)
395 JB racecallatomic_ignore
396 CMPQ R12, runtime·racedataend(SB)
397 JAE racecallatomic_ignore
398 racecallatomic_ok:
399 // Addr is within the good range, call the atomic function.
400 MOVQ g_racectx(R14), RARG0 // goroutine context
401 MOVQ 8(SP), RARG1 // caller pc
402 MOVQ (SP), RARG2 // pc
403 LEAQ 16(SP), RARG3 // arguments
404 JMP racecall<>(SB) // does not return
405 racecallatomic_ignore:
406 // Addr is outside the good range.
407 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
408 // An attempt to synchronize on the address would cause crash.
409 MOVQ AX, BX // remember the original function
410 MOVQ $__tsan_go_ignore_sync_begin(SB), AX
411 MOVQ g_racectx(R14), RARG0 // goroutine context
412 CALL racecall<>(SB)
413 MOVQ BX, AX // restore the original function
414 // Call the atomic function.
415 MOVQ g_racectx(R14), RARG0 // goroutine context
416 MOVQ 8(SP), RARG1 // caller pc
417 MOVQ (SP), RARG2 // pc
418 LEAQ 16(SP), RARG3 // arguments
419 CALL racecall<>(SB)
420 // Call __tsan_go_ignore_sync_end.
421 MOVQ $__tsan_go_ignore_sync_end(SB), AX
422 MOVQ g_racectx(R14), RARG0 // goroutine context
423 JMP racecall<>(SB)
424
425 // void runtime·racecall(void(*f)(...), ...)
426 // Calls C function f from race runtime and passes up to 4 arguments to it.
427 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
428 TEXT runtime·racecall(SB), NOSPLIT, $0-0
429 MOVQ fn+0(FP), AX
430 MOVQ arg0+8(FP), RARG0
431 MOVQ arg1+16(FP), RARG1
432 MOVQ arg2+24(FP), RARG2
433 MOVQ arg3+32(FP), RARG3
434 JMP racecall<>(SB)
435
436 // Switches SP to g0 stack and calls (AX). Arguments already set.
437 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
438 MOVQ g_m(R14), R13
439 // Switch to g0 stack.
440 MOVQ SP, R12 // callee-saved, preserved across the CALL
441 MOVQ m_g0(R13), R10
442 CMPQ R10, R14
443 JE call // already on g0
444 MOVQ (g_sched+gobuf_sp)(R10), SP
445 call:
446 ANDQ $~15, SP // alignment for gcc ABI
447 CALL AX
448 MOVQ R12, SP
449 // Back to Go world, set special registers.
450 // The g register (R14) is preserved in C.
451 XORPS X15, X15
452 RET
453
454 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
455 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
456 // The overall effect of Go->C->Go call chain is similar to that of mcall.
457 // RARG0 contains command code. RARG1 contains command-specific context.
458 // See racecallback for command codes.
459 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0-0
460 // Handle command raceGetProcCmd (0) here.
461 // First, code below assumes that we are on curg, while raceGetProcCmd
462 // can be executed on g0. Second, it is called frequently, so will
463 // benefit from this fast path.
464 CMPQ RARG0, $0
465 JNE rest
466 get_tls(RARG0)
467 MOVQ g(RARG0), RARG0
468 MOVQ g_m(RARG0), RARG0
469 MOVQ m_p(RARG0), RARG0
470 MOVQ p_raceprocctx(RARG0), RARG0
471 MOVQ RARG0, (RARG1)
472 RET
473
474 rest:
475 // Transition from C ABI to Go ABI.
476 PUSH_REGS_HOST_TO_ABI0()
477 // Set g = g0.
478 get_tls(R12)
479 MOVQ g(R12), R14
480 MOVQ g_m(R14), R13
481 MOVQ m_g0(R13), R15
482 CMPQ R13, R15
483 JEQ noswitch // branch if already on g0
484 MOVQ R15, g(R12) // g = m->g0
485 MOVQ R15, R14 // set g register
486 PUSHQ RARG1 // func arg
487 PUSHQ RARG0 // func arg
488 CALL runtime·racecallback(SB)
489 POPQ R12
490 POPQ R12
491 // All registers are smashed after Go code, reload.
492 get_tls(R12)
493 MOVQ g(R12), R13
494 MOVQ g_m(R13), R13
495 MOVQ m_curg(R13), R14
496 MOVQ R14, g(R12) // g = m->curg
497 ret:
498 POP_REGS_HOST_TO_ABI0()
499 RET
500
501 noswitch:
502 // already on g0
503 PUSHQ RARG1 // func arg
504 PUSHQ RARG0 // func arg
505 CALL runtime·racecallback(SB)
506 POPQ R12
507 POPQ R12
508 JMP ret
509
View as plain text