Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/syscall"
12 "unsafe"
13 )
14
15
16
17
18 const sigPerThreadSyscall = _SIGRTMIN + 1
19
20 type mOS struct {
21
22
23
24
25
26
27
28 profileTimer int32
29 profileTimerValid atomic.Bool
30
31
32
33 needPerThreadSyscall atomic.Uint8
34 }
35
36
37 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
38
39
40
41
42
43
44
45
46
47
48 const (
49 _FUTEX_PRIVATE_FLAG = 128
50 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
51 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
52 )
53
54
55
56
57
58
59
60
61
62 func futexsleep(addr *uint32, val uint32, ns int64) {
63
64
65
66
67
68 if ns < 0 {
69 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
70 return
71 }
72
73 var ts timespec
74 ts.setNsec(ns)
75 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
76 }
77
78
79
80
81 func futexwakeup(addr *uint32, cnt uint32) {
82 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
83 if ret >= 0 {
84 return
85 }
86
87
88
89
90 systemstack(func() {
91 print("futexwakeup addr=", addr, " returned ", ret, "\n")
92 })
93
94 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
95 }
96
97 func getproccount() int32 {
98
99
100
101
102
103
104
105 const maxCPUs = 64 * 1024
106 var buf [maxCPUs / 8]byte
107 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
108 if r < 0 {
109 return 1
110 }
111 n := int32(0)
112 for _, v := range buf[:r] {
113 for v != 0 {
114 n += int32(v & 1)
115 v >>= 1
116 }
117 }
118 if n == 0 {
119 n = 1
120 }
121 return n
122 }
123
124
125 const (
126 _CLONE_VM = 0x100
127 _CLONE_FS = 0x200
128 _CLONE_FILES = 0x400
129 _CLONE_SIGHAND = 0x800
130 _CLONE_PTRACE = 0x2000
131 _CLONE_VFORK = 0x4000
132 _CLONE_PARENT = 0x8000
133 _CLONE_THREAD = 0x10000
134 _CLONE_NEWNS = 0x20000
135 _CLONE_SYSVSEM = 0x40000
136 _CLONE_SETTLS = 0x80000
137 _CLONE_PARENT_SETTID = 0x100000
138 _CLONE_CHILD_CLEARTID = 0x200000
139 _CLONE_UNTRACED = 0x800000
140 _CLONE_CHILD_SETTID = 0x1000000
141 _CLONE_STOPPED = 0x2000000
142 _CLONE_NEWUTS = 0x4000000
143 _CLONE_NEWIPC = 0x8000000
144
145
146
147
148
149
150
151
152 cloneFlags = _CLONE_VM |
153 _CLONE_FS |
154 _CLONE_FILES |
155 _CLONE_SIGHAND |
156 _CLONE_SYSVSEM |
157 _CLONE_THREAD
158 )
159
160
161 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
162
163
164
165
166 func newosproc(mp *m) {
167 stk := unsafe.Pointer(mp.g0.stack.hi)
168
171 if false {
172 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
173 }
174
175
176
177 var oset sigset
178 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
179 ret := retryOnEAGAIN(func() int32 {
180 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
181
182
183 if r >= 0 {
184 return 0
185 }
186 return -r
187 })
188 sigprocmask(_SIG_SETMASK, &oset, nil)
189
190 if ret != 0 {
191 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
192 if ret == _EAGAIN {
193 println("runtime: may need to increase max user processes (ulimit -u)")
194 }
195 throw("newosproc")
196 }
197 }
198
199
200
201
202 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
203 stack := sysAlloc(stacksize, &memstats.stacks_sys)
204 if stack == nil {
205 writeErrStr(failallocatestack)
206 exit(1)
207 }
208 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
209 if ret < 0 {
210 writeErrStr(failthreadcreate)
211 exit(1)
212 }
213 }
214
215 const (
216 _AT_NULL = 0
217 _AT_PAGESZ = 6
218 _AT_PLATFORM = 15
219 _AT_HWCAP = 16
220 _AT_SECURE = 23
221 _AT_RANDOM = 25
222 _AT_HWCAP2 = 26
223 )
224
225 var procAuxv = []byte("/proc/self/auxv\x00")
226
227 var addrspace_vec [1]byte
228
229 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
230
231 var auxvreadbuf [128]uintptr
232
233 func sysargs(argc int32, argv **byte) {
234 n := argc + 1
235
236
237 for argv_index(argv, n) != nil {
238 n++
239 }
240
241
242 n++
243
244
245 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
246
247 if pairs := sysauxv(auxvp[:]); pairs != 0 {
248 auxv = auxvp[: pairs*2 : pairs*2]
249 return
250 }
251
252
253
254 fd := open(&procAuxv[0], 0 , 0)
255 if fd < 0 {
256
257
258
259 const size = 256 << 10
260 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
261 if err != 0 {
262 return
263 }
264 var n uintptr
265 for n = 4 << 10; n < size; n <<= 1 {
266 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
267 if err == 0 {
268 physPageSize = n
269 break
270 }
271 }
272 if physPageSize == 0 {
273 physPageSize = size
274 }
275 munmap(p, size)
276 return
277 }
278
279 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
280 closefd(fd)
281 if n < 0 {
282 return
283 }
284
285
286 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
287 pairs := sysauxv(auxvreadbuf[:])
288 auxv = auxvreadbuf[: pairs*2 : pairs*2]
289 }
290
291
292 var secureMode bool
293
294 func sysauxv(auxv []uintptr) (pairs int) {
295 var i int
296 for ; auxv[i] != _AT_NULL; i += 2 {
297 tag, val := auxv[i], auxv[i+1]
298 switch tag {
299 case _AT_RANDOM:
300
301
302 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
303
304 case _AT_PAGESZ:
305 physPageSize = val
306
307 case _AT_SECURE:
308 secureMode = val == 1
309 }
310
311 archauxv(tag, val)
312 vdsoauxv(tag, val)
313 }
314 return i / 2
315 }
316
317 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
318
319 func getHugePageSize() uintptr {
320 var numbuf [20]byte
321 fd := open(&sysTHPSizePath[0], 0 , 0)
322 if fd < 0 {
323 return 0
324 }
325 ptr := noescape(unsafe.Pointer(&numbuf[0]))
326 n := read(fd, ptr, int32(len(numbuf)))
327 closefd(fd)
328 if n <= 0 {
329 return 0
330 }
331 n--
332 v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
333 if !ok || v < 0 {
334 v = 0
335 }
336 if v&(v-1) != 0 {
337
338 return 0
339 }
340 return uintptr(v)
341 }
342
343 func osinit() {
344 ncpu = getproccount()
345 physHugePageSize = getHugePageSize()
346 osArchInit()
347 }
348
349 var urandom_dev = []byte("/dev/urandom\x00")
350
351 func readRandom(r []byte) int {
352 fd := open(&urandom_dev[0], 0 , 0)
353 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
354 closefd(fd)
355 return int(n)
356 }
357
358 func goenvs() {
359 goenvs_unix()
360 }
361
362
363
364
365
366
367
368 func libpreinit() {
369 initsig(true)
370 }
371
372
373
374 func mpreinit(mp *m) {
375 mp.gsignal = malg(32 * 1024)
376 mp.gsignal.m = mp
377 }
378
379 func gettid() uint32
380
381
382
383 func minit() {
384 minitSignals()
385
386
387
388
389 getg().m.procid = uint64(gettid())
390 }
391
392
393
394
395 func unminit() {
396 unminitSignals()
397 getg().m.procid = 0
398 }
399
400
401
402 func mdestroy(mp *m) {
403 }
404
405
406
407
408
409 func sigreturn__sigaction()
410 func sigtramp()
411 func cgoSigtramp()
412
413
414 func sigaltstack(new, old *stackt)
415
416
417 func setitimer(mode int32, new, old *itimerval)
418
419
420 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
421
422
423 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
424
425
426 func timer_delete(timerid int32) int32
427
428
429 func rtsigprocmask(how int32, new, old *sigset, size int32)
430
431
432
433 func sigprocmask(how int32, new, old *sigset) {
434 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
435 }
436
437 func raise(sig uint32)
438 func raiseproc(sig uint32)
439
440
441 func sched_getaffinity(pid, len uintptr, buf *byte) int32
442 func osyield()
443
444
445 func osyield_no_g() {
446 osyield()
447 }
448
449 func pipe2(flags int32) (r, w int32, errno int32)
450
451
452 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
453 r, _, err := syscall.Syscall6(syscall.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
454 return int32(r), int32(err)
455 }
456
457 const (
458 _si_max_size = 128
459 _sigev_max_size = 64
460 )
461
462
463
464 func setsig(i uint32, fn uintptr) {
465 var sa sigactiont
466 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
467 sigfillset(&sa.sa_mask)
468
469
470
471 if GOARCH == "386" || GOARCH == "amd64" {
472 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
473 }
474 if fn == abi.FuncPCABIInternal(sighandler) {
475 if iscgo {
476 fn = abi.FuncPCABI0(cgoSigtramp)
477 } else {
478 fn = abi.FuncPCABI0(sigtramp)
479 }
480 }
481 sa.sa_handler = fn
482 sigaction(i, &sa, nil)
483 }
484
485
486
487 func setsigstack(i uint32) {
488 var sa sigactiont
489 sigaction(i, nil, &sa)
490 if sa.sa_flags&_SA_ONSTACK != 0 {
491 return
492 }
493 sa.sa_flags |= _SA_ONSTACK
494 sigaction(i, &sa, nil)
495 }
496
497
498
499 func getsig(i uint32) uintptr {
500 var sa sigactiont
501 sigaction(i, nil, &sa)
502 return sa.sa_handler
503 }
504
505
506
507
508 func setSignalstackSP(s *stackt, sp uintptr) {
509 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
510 }
511
512
513 func (c *sigctxt) fixsigcode(sig uint32) {
514 }
515
516
517
518
519 func sysSigaction(sig uint32, new, old *sigactiont) {
520 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
521
522
523
524
525
526
527
528
529
530
531
532 if sig != 32 && sig != 33 && sig != 64 {
533
534 systemstack(func() {
535 throw("sigaction failed")
536 })
537 }
538 }
539 }
540
541
542
543
544 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
545
546 func getpid() int
547 func tgkill(tgid, tid, sig int)
548
549
550 func signalM(mp *m, sig int) {
551 tgkill(getpid(), int(mp.procid), sig)
552 }
553
554
555
556
557
558
559
560
561 func validSIGPROF(mp *m, c *sigctxt) bool {
562 code := int32(c.sigcode())
563 setitimer := code == _SI_KERNEL
564 timer_create := code == _SI_TIMER
565
566 if !(setitimer || timer_create) {
567
568
569
570 return true
571 }
572
573 if mp == nil {
574
575
576
577
578
579
580
581
582
583
584
585
586 return setitimer
587 }
588
589
590
591 if mp.profileTimerValid.Load() {
592
593
594
595
596
597 return timer_create
598 }
599
600
601 return setitimer
602 }
603
604 func setProcessCPUProfiler(hz int32) {
605 setProcessCPUProfilerTimer(hz)
606 }
607
608 func setThreadCPUProfiler(hz int32) {
609 mp := getg().m
610 mp.profilehz = hz
611
612
613 if mp.profileTimerValid.Load() {
614 timerid := mp.profileTimer
615 mp.profileTimerValid.Store(false)
616 mp.profileTimer = 0
617
618 ret := timer_delete(timerid)
619 if ret != 0 {
620 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
621 throw("timer_delete")
622 }
623 }
624
625 if hz == 0 {
626
627 return
628 }
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649 spec := new(itimerspec)
650 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
651 spec.it_interval.setNsec(1e9 / int64(hz))
652
653 var timerid int32
654 var sevp sigevent
655 sevp.notify = _SIGEV_THREAD_ID
656 sevp.signo = _SIGPROF
657 sevp.sigev_notify_thread_id = int32(mp.procid)
658 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
659 if ret != 0 {
660
661
662 return
663 }
664
665 ret = timer_settime(timerid, 0, spec, nil)
666 if ret != 0 {
667 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
668 ", 0, {interval: {",
669 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
670 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
671 throw("timer_settime")
672 }
673
674 mp.profileTimer = timerid
675 mp.profileTimerValid.Store(true)
676 }
677
678
679
680 type perThreadSyscallArgs struct {
681 trap uintptr
682 a1 uintptr
683 a2 uintptr
684 a3 uintptr
685 a4 uintptr
686 a5 uintptr
687 a6 uintptr
688 r1 uintptr
689 r2 uintptr
690 }
691
692
693
694
695
696
697 var perThreadSyscall perThreadSyscallArgs
698
699
700
701
702
703
704
705
706
707 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
708 if iscgo {
709
710 panic("doAllThreadsSyscall not supported with cgo enabled")
711 }
712
713
714
715
716
717
718
719
720 stw := stopTheWorld(stwAllThreadsSyscall)
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742 allocmLock.lock()
743
744
745
746
747
748
749 acquirem()
750
751
752
753
754
755
756 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
757 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
758
759 r2 = 0
760 }
761 if errno != 0 {
762 releasem(getg().m)
763 allocmLock.unlock()
764 startTheWorld(stw)
765 return r1, r2, errno
766 }
767
768 perThreadSyscall = perThreadSyscallArgs{
769 trap: trap,
770 a1: a1,
771 a2: a2,
772 a3: a3,
773 a4: a4,
774 a5: a5,
775 a6: a6,
776 r1: r1,
777 r2: r2,
778 }
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815 for mp := allm; mp != nil; mp = mp.alllink {
816 for atomic.Load64(&mp.procid) == 0 {
817
818 osyield()
819 }
820 }
821
822
823
824 gp := getg()
825 tid := gp.m.procid
826 for mp := allm; mp != nil; mp = mp.alllink {
827 if atomic.Load64(&mp.procid) == tid {
828
829 continue
830 }
831 mp.needPerThreadSyscall.Store(1)
832 signalM(mp, sigPerThreadSyscall)
833 }
834
835
836 for mp := allm; mp != nil; mp = mp.alllink {
837 if mp.procid == tid {
838 continue
839 }
840 for mp.needPerThreadSyscall.Load() != 0 {
841 osyield()
842 }
843 }
844
845 perThreadSyscall = perThreadSyscallArgs{}
846
847 releasem(getg().m)
848 allocmLock.unlock()
849 startTheWorld(stw)
850
851 return r1, r2, errno
852 }
853
854
855
856
857
858
859
860 func runPerThreadSyscall() {
861 gp := getg()
862 if gp.m.needPerThreadSyscall.Load() == 0 {
863 return
864 }
865
866 args := perThreadSyscall
867 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
868 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
869
870 r2 = 0
871 }
872 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
873 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
874 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
875 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
876 }
877
878 gp.m.needPerThreadSyscall.Store(0)
879 }
880
881 const (
882 _SI_USER = 0
883 _SI_TKILL = -6
884 _SYS_SECCOMP = 1
885 )
886
887
888
889
890
891 func (c *sigctxt) sigFromUser() bool {
892 code := int32(c.sigcode())
893 return code == _SI_USER || code == _SI_TKILL
894 }
895
896
897
898
899 func (c *sigctxt) sigFromSeccomp() bool {
900 code := int32(c.sigcode())
901 return code == _SYS_SECCOMP
902 }
903
904
905 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
906 r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
907 return int32(r), int32(err)
908 }
909
View as plain text