Text file
src/runtime/race_riscv64.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10
11 // The following thunks allow calling the gcc-compiled race runtime directly
12 // from Go code without going all the way through cgo.
13 // First, it's much faster (up to 50% speedup for real Go programs).
14 // Second, it eliminates race-related special cases from cgocall and scheduler.
15 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
16
17 // A brief recap of the riscv C calling convention.
18 // Arguments are passed in X10...X17
19 // Callee-saved registers are: X8, X9, X18..X27
20 // Temporary registers are: X5..X7, X28..X31
21
22 // When calling racecalladdr, X11 is the call target address.
23
24 // The race ctx, ThreadState *thr below, is passed in X10 and loaded in racecalladdr.
25
26 // func runtime·raceread(addr uintptr)
27 // Called from instrumented code.
28 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
29 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
30 MOV $__tsan_read(SB), X5
31 MOV X10, X11
32 MOV X1, X12
33 JMP racecalladdr<>(SB)
34
35 // func runtime·RaceRead(addr uintptr)
36 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
37 // This needs to be a tail call, because raceread reads caller pc.
38 JMP runtime·raceread(SB)
39
40 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
41 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
42 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
43 MOV $__tsan_read_pc(SB), X5
44 MOV addr+0(FP), X11
45 MOV callpc+8(FP), X12
46 MOV pc+16(FP), X13
47 JMP racecalladdr<>(SB)
48
49 // func runtime·racewrite(addr uintptr)
50 // Called from instrumented code.
51 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
52 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
53 MOV $__tsan_write(SB), X5
54 MOV X10, X11
55 MOV X1, X12
56 JMP racecalladdr<>(SB)
57
58 // func runtime·RaceWrite(addr uintptr)
59 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
60 // This needs to be a tail call, because racewrite reads caller pc.
61 JMP runtime·racewrite(SB)
62
63 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
64 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
65 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOV $__tsan_write_pc(SB), X5
67 MOV addr+0(FP), X11
68 MOV callpc+8(FP), X12
69 MOV pc+16(FP), X13
70 JMP racecalladdr<>(SB)
71
72 // func runtime·racereadrange(addr, size uintptr)
73 // Called from instrumented code.
74 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
75 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
76 MOV $__tsan_read_range(SB), X5
77 MOV X11, X12
78 MOV X10, X11
79 MOV X1, X13
80 JMP racecalladdr<>(SB)
81
82 // func runtime·RaceReadRange(addr, size uintptr)
83 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
84 // This needs to be a tail call, because racereadrange reads caller pc.
85 JMP runtime·racereadrange(SB)
86
87 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
88 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
89 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
90 MOV $__tsan_read_range(SB), X5
91 MOV addr+0(FP), X11
92 MOV size+8(FP), X12
93 MOV pc+16(FP), X13
94
95 // pc is an interceptor address, but TSan expects it to point to the
96 // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
97 ADD $4, X13
98 JMP racecalladdr<>(SB)
99
100 // func runtime·racewriterange(addr, size uintptr)
101 // Called from instrumented code.
102 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
103 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
104 MOV $__tsan_write_range(SB), X5
105 MOV X11, X12
106 MOV X10, X11
107 MOV X1, X13
108 JMP racecalladdr<>(SB)
109
110 // func runtime·RaceWriteRange(addr, size uintptr)
111 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
112 // This needs to be a tail call, because racewriterange reads caller pc.
113 JMP runtime·racewriterange(SB)
114
115 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
116 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
117 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
118 MOV $__tsan_write_range(SB), X5
119 MOV addr+0(FP), X11
120 MOV size+8(FP), X12
121 MOV pc+16(FP), X13
122 // pc is an interceptor address, but TSan expects it to point to the
123 // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
124 ADD $4, X13
125 JMP racecalladdr<>(SB)
126
127 // If addr (X11) is out of range, do nothing. Otherwise, setup goroutine context and
128 // invoke racecall. Other arguments are already set.
129 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
130 MOV runtime·racearenastart(SB), X7
131 BLT X11, X7, data // Before racearena start?
132 MOV runtime·racearenaend(SB), X7
133 BLT X11, X7, call // Before racearena end?
134 data:
135 MOV runtime·racedatastart(SB), X7
136 BLT X11, X7, ret // Before racedata start?
137 MOV runtime·racedataend(SB), X7
138 BGE X11, X7, ret // At or after racedata end?
139 call:
140 MOV g_racectx(g), X10
141 JMP racecall<>(SB)
142 ret:
143 RET
144
145 // func runtime·racefuncenter(pc uintptr)
146 // Called from instrumented code.
147 TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
148 MOV $__tsan_func_enter(SB), X5
149 MOV X10, X11
150 MOV g_racectx(g), X10
151 JMP racecall<>(SB)
152
153 // Common code for racefuncenter
154 // X1 = caller's return address
155 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
156 // void __tsan_func_enter(ThreadState *thr, void *pc);
157 MOV $__tsan_func_enter(SB), X5
158 MOV g_racectx(g), X10
159 MOV X1, X11
160 JMP racecall<>(SB)
161
162 // func runtime·racefuncexit()
163 // Called from instrumented code.
164 TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
165 // void __tsan_func_exit(ThreadState *thr);
166 MOV $__tsan_func_exit(SB), X5
167 MOV g_racectx(g), X10
168 JMP racecall<>(SB)
169
170 // Atomic operations for sync/atomic package.
171
172 // Load
173
174 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
175 GO_ARGS
176 MOV $__tsan_go_atomic32_load(SB), X5
177 CALL racecallatomic<>(SB)
178 RET
179
180 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
181 GO_ARGS
182 MOV $__tsan_go_atomic64_load(SB), X5
183 CALL racecallatomic<>(SB)
184 RET
185
186 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
187 GO_ARGS
188 JMP sync∕atomic·LoadInt32(SB)
189
190 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
191 GO_ARGS
192 JMP sync∕atomic·LoadInt64(SB)
193
194 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
195 GO_ARGS
196 JMP sync∕atomic·LoadInt64(SB)
197
198 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
199 GO_ARGS
200 JMP sync∕atomic·LoadInt64(SB)
201
202 // Store
203
204 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
205 GO_ARGS
206 MOV $__tsan_go_atomic32_store(SB), X5
207 CALL racecallatomic<>(SB)
208 RET
209
210 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
211 GO_ARGS
212 MOV $__tsan_go_atomic64_store(SB), X5
213 CALL racecallatomic<>(SB)
214 RET
215
216 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
217 GO_ARGS
218 JMP sync∕atomic·StoreInt32(SB)
219
220 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·StoreInt64(SB)
223
224 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
225 GO_ARGS
226 JMP sync∕atomic·StoreInt64(SB)
227
228 // Swap
229
230 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
231 GO_ARGS
232 MOV $__tsan_go_atomic32_exchange(SB), X5
233 CALL racecallatomic<>(SB)
234 RET
235
236 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
237 GO_ARGS
238 MOV $__tsan_go_atomic64_exchange(SB), X5
239 CALL racecallatomic<>(SB)
240 RET
241
242 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
243 GO_ARGS
244 JMP sync∕atomic·SwapInt32(SB)
245
246 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
247 GO_ARGS
248 JMP sync∕atomic·SwapInt64(SB)
249
250 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
251 GO_ARGS
252 JMP sync∕atomic·SwapInt64(SB)
253
254 // Add
255
256 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
257 GO_ARGS
258 MOV $__tsan_go_atomic32_fetch_add(SB), X5
259 CALL racecallatomic<>(SB)
260 // TSan performed fetch_add, but Go needs add_fetch.
261 MOVW add+8(FP), X5
262 MOVW ret+16(FP), X6
263 ADD X5, X6, X5
264 MOVW X5, ret+16(FP)
265 RET
266
267 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
268 GO_ARGS
269 MOV $__tsan_go_atomic64_fetch_add(SB), X5
270 CALL racecallatomic<>(SB)
271 // TSan performed fetch_add, but Go needs add_fetch.
272 MOV add+8(FP), X5
273 MOV ret+16(FP), X6
274 ADD X5, X6, X5
275 MOV X5, ret+16(FP)
276 RET
277
278 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
279 GO_ARGS
280 JMP sync∕atomic·AddInt32(SB)
281
282 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
283 GO_ARGS
284 JMP sync∕atomic·AddInt64(SB)
285
286 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
287 GO_ARGS
288 JMP sync∕atomic·AddInt64(SB)
289
290 // And
291 TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
292 GO_ARGS
293 MOV $__tsan_go_atomic32_fetch_and(SB), X5
294 CALL racecallatomic<>(SB)
295 RET
296
297 TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
298 GO_ARGS
299 MOV $__tsan_go_atomic64_fetch_and(SB), X5
300 CALL racecallatomic<>(SB)
301 RET
302
303 TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
304 GO_ARGS
305 JMP sync∕atomic·AndInt32(SB)
306
307 TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
308 GO_ARGS
309 JMP sync∕atomic·AndInt64(SB)
310
311 TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
312 GO_ARGS
313 JMP sync∕atomic·AndInt64(SB)
314
315 // Or
316 TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
317 GO_ARGS
318 MOV $__tsan_go_atomic32_fetch_or(SB), X5
319 CALL racecallatomic<>(SB)
320 RET
321
322 TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
323 GO_ARGS
324 MOV $__tsan_go_atomic64_fetch_or(SB), X5
325 CALL racecallatomic<>(SB)
326 RET
327
328 TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
329 GO_ARGS
330 JMP sync∕atomic·OrInt32(SB)
331
332 TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
333 GO_ARGS
334 JMP sync∕atomic·OrInt64(SB)
335
336 TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
337 GO_ARGS
338 JMP sync∕atomic·OrInt64(SB)
339
340 // CompareAndSwap
341
342 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
343 GO_ARGS
344 MOV $__tsan_go_atomic32_compare_exchange(SB), X5
345 CALL racecallatomic<>(SB)
346 RET
347
348 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
349 GO_ARGS
350 MOV $__tsan_go_atomic64_compare_exchange(SB), X5
351 CALL racecallatomic<>(SB)
352 RET
353
354 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
355 GO_ARGS
356 JMP sync∕atomic·CompareAndSwapInt32(SB)
357
358 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
359 GO_ARGS
360 JMP sync∕atomic·CompareAndSwapInt64(SB)
361
362 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
363 GO_ARGS
364 JMP sync∕atomic·CompareAndSwapInt64(SB)
365
366 // Generic atomic operation implementation.
367 // X5 = addr of target function
368 TEXT racecallatomic<>(SB), NOSPLIT, $0
369 // Set up these registers
370 // X10 = *ThreadState
371 // X11 = caller pc
372 // X12 = pc
373 // X13 = addr of incoming arg list
374
375 // Trigger SIGSEGV early.
376 MOV 24(X2), X6 // 1st arg is addr. after two times CALL, get it at 24(X2)
377 MOVB (X6), X0 // segv here if addr is bad
378 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
379 MOV runtime·racearenastart(SB), X7
380 BLT X6, X7, racecallatomic_data
381 MOV runtime·racearenaend(SB), X7
382 BLT X6, X7, racecallatomic_ok
383 racecallatomic_data:
384 MOV runtime·racedatastart(SB), X7
385 BLT X6, X7, racecallatomic_ignore
386 MOV runtime·racedataend(SB), X7
387 BGE X6, X7, racecallatomic_ignore
388 racecallatomic_ok:
389 // Addr is within the good range, call the atomic function.
390 MOV g_racectx(g), X10 // goroutine context
391 MOV 8(X2), X11 // caller pc
392 MOV X1, X12 // pc
393 ADD $24, X2, X13
394 CALL racecall<>(SB)
395 RET
396 racecallatomic_ignore:
397 // Addr is outside the good range.
398 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
399 // An attempt to synchronize on the address would cause crash.
400 MOV X1, X20 // save PC
401 MOV X5, X21 // save target function
402 MOV $__tsan_go_ignore_sync_begin(SB), X5
403 MOV g_racectx(g), X10 // goroutine context
404 CALL racecall<>(SB)
405 MOV X21, X5 // restore the target function
406 // Call the atomic function.
407 MOV g_racectx(g), X10 // goroutine context
408 MOV 8(X2), X11 // caller pc
409 MOV X20, X12 // pc
410 ADD $24, X2, X13 // arguments
411 CALL racecall<>(SB)
412 // Call __tsan_go_ignore_sync_end.
413 MOV $__tsan_go_ignore_sync_end(SB), X5
414 MOV g_racectx(g), X10 // goroutine context
415 CALL racecall<>(SB)
416 RET
417
418 // func runtime·racecall(void(*f)(...), ...)
419 // Calls C function f from race runtime and passes up to 4 arguments to it.
420 // The arguments are never heap-object-preserving pointers, so we pretend there
421 // are no arguments.
422 TEXT runtime·racecall(SB), NOSPLIT, $0-0
423 MOV fn+0(FP), X5
424 MOV arg0+8(FP), X10
425 MOV arg1+16(FP), X11
426 MOV arg2+24(FP), X12
427 MOV arg3+32(FP), X13
428 JMP racecall<>(SB)
429
430 // Switches SP to g0 stack and calls X5. Arguments are already set.
431 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
432 MOV X1, X18 // Save RA in callee save register
433 MOV X2, X19 // Save SP in callee save register
434 CALL runtime·save_g(SB) // Save g for callbacks
435
436 MOV g_m(g), X6
437
438 // Switch to g0 stack if we aren't already on g0 or gsignal.
439 MOV m_gsignal(X6), X7
440 BEQ X7, g, call
441 MOV m_g0(X6), X7
442 BEQ X7, g, call
443
444 MOV (g_sched+gobuf_sp)(X7), X2 // Switch to g0 stack
445 call:
446 JALR RA, (X5) // Call C function
447 MOV X19, X2 // Restore SP
448 JMP (X18) // Return to Go.
449
450 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
451 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
452 // The overall effect of Go->C->Go call chain is similar to that of mcall.
453 // R0 contains command code. R1 contains command-specific context.
454 // See racecallback for command codes.
455 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
456 // Handle command raceGetProcCmd (0) here.
457 // First, code below assumes that we are on curg, while raceGetProcCmd
458 // can be executed on g0. Second, it is called frequently, so will
459 // benefit from this fast path.
460 BNEZ X10, rest
461 MOV X1, X5
462 MOV g, X6
463 CALL runtime·load_g(SB)
464 MOV g_m(g), X7
465 MOV m_p(X7), X7
466 MOV p_raceprocctx(X7), X7
467 MOV X7, (X11)
468 MOV X6, g
469 JMP (X5)
470 rest:
471 // Save callee-save registers (X8, X9, X18..X27, F8, F9, F18..F27),
472 // since Go code will not respect this.
473 // 8(X2) and 16(X2) are for args passed to racecallback
474 SUB $(27*8), X2
475 MOV X1, (0*8)(X2)
476 MOV X8, (3*8)(X2)
477 MOV X9, (4*8)(X2)
478 MOV X18, (5*8)(X2)
479 MOV X19, (6*8)(X2)
480 MOV X20, (7*8)(X2)
481 MOV X21, (8*8)(X2)
482 MOV X22, (9*8)(X2)
483 MOV X23, (10*8)(X2)
484 MOV X24, (11*8)(X2)
485 MOV X25, (12*8)(X2)
486 MOV X26, (13*8)(X2)
487 MOV g, (14*8)(X2)
488 MOVD F8, (15*8)(X2)
489 MOVD F9, (16*8)(X2)
490 MOVD F18, (17*8)(X2)
491 MOVD F19, (18*8)(X2)
492 MOVD F20, (19*8)(X2)
493 MOVD F21, (20*8)(X2)
494 MOVD F22, (21*8)(X2)
495 MOVD F23, (22*8)(X2)
496 MOVD F24, (23*8)(X2)
497 MOVD F25, (24*8)(X2)
498 MOVD F26, (25*8)(X2)
499 MOVD F27, (26*8)(X2)
500
501 // Set g = g0.
502 CALL runtime·load_g(SB)
503 MOV g_m(g), X5
504 MOV m_g0(X5), X6
505 BEQ X6, g, noswitch // branch if already on g0
506 MOV X6, g
507
508 MOV X10, 8(X2) // func arg
509 MOV X11, 16(X2) // func arg
510 CALL runtime·racecallback(SB)
511
512 // All registers are smashed after Go code, reload.
513 MOV g_m(g), X5
514 MOV m_curg(X5), g // g = m->curg
515 ret:
516 // Restore callee-save registers.
517 MOV (0*8)(X2), X1
518 MOV (3*8)(X2), X8
519 MOV (4*8)(X2), X9
520 MOV (5*8)(X2), X18
521 MOV (6*8)(X2), X19
522 MOV (7*8)(X2), X20
523 MOV (8*8)(X2), X21
524 MOV (9*8)(X2), X22
525 MOV (10*8)(X2), X23
526 MOV (11*8)(X2), X24
527 MOV (12*8)(X2), X25
528 MOV (13*8)(X2), X26
529 MOV (14*8)(X2), g
530 MOVD (15*8)(X2), F8
531 MOVD (16*8)(X2), F9
532 MOVD (17*8)(X2), F18
533 MOVD (18*8)(X2), F19
534 MOVD (19*8)(X2), F20
535 MOVD (20*8)(X2), F21
536 MOVD (21*8)(X2), F22
537 MOVD (22*8)(X2), F23
538 MOVD (23*8)(X2), F24
539 MOVD (24*8)(X2), F25
540 MOVD (25*8)(X2), F26
541 MOVD (26*8)(X2), F27
542
543 ADD $(27*8), X2
544 JMP (X1)
545
546 noswitch:
547 // already on g0
548 MOV X10, 8(X2) // func arg
549 MOV X11, 16(X2) // func arg
550 CALL runtime·racecallback(SB)
551 JMP ret
552
View as plain text