Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/syscall"
12 "unsafe"
13 )
14
15
16
17
18 const sigPerThreadSyscall = _SIGRTMIN + 1
19
20 type mOS struct {
21
22
23
24
25
26
27
28 profileTimer int32
29 profileTimerValid atomic.Bool
30
31
32
33 needPerThreadSyscall atomic.Uint8
34
35
36
37 vgetrandomState uintptr
38 }
39
40
41 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
42
43
44
45
46
47
48
49
50
51
52 const (
53 _FUTEX_PRIVATE_FLAG = 128
54 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
55 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
56 )
57
58
59
60
61
62
63
64
65
66 func futexsleep(addr *uint32, val uint32, ns int64) {
67
68
69
70
71
72 if ns < 0 {
73 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
74 return
75 }
76
77 var ts timespec
78 ts.setNsec(ns)
79 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
80 }
81
82
83
84
85 func futexwakeup(addr *uint32, cnt uint32) {
86 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
87 if ret >= 0 {
88 return
89 }
90
91
92
93
94 systemstack(func() {
95 print("futexwakeup addr=", addr, " returned ", ret, "\n")
96 })
97
98 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
99 }
100
101 func getproccount() int32 {
102
103
104
105
106
107
108
109 const maxCPUs = 64 * 1024
110 var buf [maxCPUs / 8]byte
111 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
112 if r < 0 {
113 return 1
114 }
115 n := int32(0)
116 for _, v := range buf[:r] {
117 for v != 0 {
118 n += int32(v & 1)
119 v >>= 1
120 }
121 }
122 if n == 0 {
123 n = 1
124 }
125 return n
126 }
127
128
129 const (
130 _CLONE_VM = 0x100
131 _CLONE_FS = 0x200
132 _CLONE_FILES = 0x400
133 _CLONE_SIGHAND = 0x800
134 _CLONE_PTRACE = 0x2000
135 _CLONE_VFORK = 0x4000
136 _CLONE_PARENT = 0x8000
137 _CLONE_THREAD = 0x10000
138 _CLONE_NEWNS = 0x20000
139 _CLONE_SYSVSEM = 0x40000
140 _CLONE_SETTLS = 0x80000
141 _CLONE_PARENT_SETTID = 0x100000
142 _CLONE_CHILD_CLEARTID = 0x200000
143 _CLONE_UNTRACED = 0x800000
144 _CLONE_CHILD_SETTID = 0x1000000
145 _CLONE_STOPPED = 0x2000000
146 _CLONE_NEWUTS = 0x4000000
147 _CLONE_NEWIPC = 0x8000000
148
149
150
151
152
153
154
155
156 cloneFlags = _CLONE_VM |
157 _CLONE_FS |
158 _CLONE_FILES |
159 _CLONE_SIGHAND |
160 _CLONE_SYSVSEM |
161 _CLONE_THREAD
162 )
163
164
165 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
166
167
168
169
170 func newosproc(mp *m) {
171 stk := unsafe.Pointer(mp.g0.stack.hi)
172
175 if false {
176 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
177 }
178
179
180
181 var oset sigset
182 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
183 ret := retryOnEAGAIN(func() int32 {
184 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
185
186
187 if r >= 0 {
188 return 0
189 }
190 return -r
191 })
192 sigprocmask(_SIG_SETMASK, &oset, nil)
193
194 if ret != 0 {
195 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
196 if ret == _EAGAIN {
197 println("runtime: may need to increase max user processes (ulimit -u)")
198 }
199 throw("newosproc")
200 }
201 }
202
203
204
205
206 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
207 stack := sysAlloc(stacksize, &memstats.stacks_sys)
208 if stack == nil {
209 writeErrStr(failallocatestack)
210 exit(1)
211 }
212 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
213 if ret < 0 {
214 writeErrStr(failthreadcreate)
215 exit(1)
216 }
217 }
218
219 const (
220 _AT_NULL = 0
221 _AT_PAGESZ = 6
222 _AT_PLATFORM = 15
223 _AT_HWCAP = 16
224 _AT_SECURE = 23
225 _AT_RANDOM = 25
226 _AT_HWCAP2 = 26
227 )
228
229 var procAuxv = []byte("/proc/self/auxv\x00")
230
231 var addrspace_vec [1]byte
232
233 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
234
235 var auxvreadbuf [128]uintptr
236
237 func sysargs(argc int32, argv **byte) {
238 n := argc + 1
239
240
241 for argv_index(argv, n) != nil {
242 n++
243 }
244
245
246 n++
247
248
249 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
250
251 if pairs := sysauxv(auxvp[:]); pairs != 0 {
252 auxv = auxvp[: pairs*2 : pairs*2]
253 return
254 }
255
256
257
258 fd := open(&procAuxv[0], 0 , 0)
259 if fd < 0 {
260
261
262
263 const size = 256 << 10
264 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
265 if err != 0 {
266 return
267 }
268 var n uintptr
269 for n = 4 << 10; n < size; n <<= 1 {
270 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
271 if err == 0 {
272 physPageSize = n
273 break
274 }
275 }
276 if physPageSize == 0 {
277 physPageSize = size
278 }
279 munmap(p, size)
280 return
281 }
282
283 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
284 closefd(fd)
285 if n < 0 {
286 return
287 }
288
289
290 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
291 pairs := sysauxv(auxvreadbuf[:])
292 auxv = auxvreadbuf[: pairs*2 : pairs*2]
293 }
294
295
296 var secureMode bool
297
298 func sysauxv(auxv []uintptr) (pairs int) {
299
300
301 var i int
302 for ; auxv[i] != _AT_NULL; i += 2 {
303 tag, val := auxv[i], auxv[i+1]
304 switch tag {
305 case _AT_RANDOM:
306
307
308
309
310
311
312 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
313
314 case _AT_PAGESZ:
315 physPageSize = val
316
317 case _AT_SECURE:
318 secureMode = val == 1
319 }
320
321 archauxv(tag, val)
322 vdsoauxv(tag, val)
323 }
324 return i / 2
325 }
326
327 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
328
329 func getHugePageSize() uintptr {
330 var numbuf [20]byte
331 fd := open(&sysTHPSizePath[0], 0 , 0)
332 if fd < 0 {
333 return 0
334 }
335 ptr := noescape(unsafe.Pointer(&numbuf[0]))
336 n := read(fd, ptr, int32(len(numbuf)))
337 closefd(fd)
338 if n <= 0 {
339 return 0
340 }
341 n--
342 v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
343 if !ok || v < 0 {
344 v = 0
345 }
346 if v&(v-1) != 0 {
347
348 return 0
349 }
350 return uintptr(v)
351 }
352
353 func osinit() {
354 ncpu = getproccount()
355 physHugePageSize = getHugePageSize()
356 osArchInit()
357 vgetrandomInit()
358 }
359
360 var urandom_dev = []byte("/dev/urandom\x00")
361
362 func readRandom(r []byte) int {
363
364
365 fd := open(&urandom_dev[0], 0 , 0)
366 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
367 closefd(fd)
368 return int(n)
369 }
370
371 func goenvs() {
372 goenvs_unix()
373 }
374
375
376
377
378
379
380
381 func libpreinit() {
382 initsig(true)
383 }
384
385
386
387 func mpreinit(mp *m) {
388 mp.gsignal = malg(32 * 1024)
389 mp.gsignal.m = mp
390 }
391
392 func gettid() uint32
393
394
395
396 func minit() {
397 minitSignals()
398
399
400
401
402 getg().m.procid = uint64(gettid())
403 }
404
405
406
407
408 func unminit() {
409 unminitSignals()
410 getg().m.procid = 0
411 }
412
413
414
415 func mdestroy(mp *m) {
416 if mp.vgetrandomState != 0 {
417 vgetrandomPutState(mp.vgetrandomState)
418 mp.vgetrandomState = 0
419 }
420 }
421
422
423
424
425
426 func sigreturn__sigaction()
427 func sigtramp()
428 func cgoSigtramp()
429
430
431 func sigaltstack(new, old *stackt)
432
433
434 func setitimer(mode int32, new, old *itimerval)
435
436
437 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
438
439
440 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
441
442
443 func timer_delete(timerid int32) int32
444
445
446 func rtsigprocmask(how int32, new, old *sigset, size int32)
447
448
449
450 func sigprocmask(how int32, new, old *sigset) {
451 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
452 }
453
454 func raise(sig uint32)
455 func raiseproc(sig uint32)
456
457
458 func sched_getaffinity(pid, len uintptr, buf *byte) int32
459 func osyield()
460
461
462 func osyield_no_g() {
463 osyield()
464 }
465
466 func pipe2(flags int32) (r, w int32, errno int32)
467
468
469 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
470 r, _, err := syscall.Syscall6(syscall.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
471 return int32(r), int32(err)
472 }
473
474 const (
475 _si_max_size = 128
476 _sigev_max_size = 64
477 )
478
479
480
481 func setsig(i uint32, fn uintptr) {
482 var sa sigactiont
483 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
484 sigfillset(&sa.sa_mask)
485
486
487
488 if GOARCH == "386" || GOARCH == "amd64" {
489 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
490 }
491 if fn == abi.FuncPCABIInternal(sighandler) {
492 if iscgo {
493 fn = abi.FuncPCABI0(cgoSigtramp)
494 } else {
495 fn = abi.FuncPCABI0(sigtramp)
496 }
497 }
498 sa.sa_handler = fn
499 sigaction(i, &sa, nil)
500 }
501
502
503
504 func setsigstack(i uint32) {
505 var sa sigactiont
506 sigaction(i, nil, &sa)
507 if sa.sa_flags&_SA_ONSTACK != 0 {
508 return
509 }
510 sa.sa_flags |= _SA_ONSTACK
511 sigaction(i, &sa, nil)
512 }
513
514
515
516 func getsig(i uint32) uintptr {
517 var sa sigactiont
518 sigaction(i, nil, &sa)
519 return sa.sa_handler
520 }
521
522
523
524
525 func setSignalstackSP(s *stackt, sp uintptr) {
526 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
527 }
528
529
530 func (c *sigctxt) fixsigcode(sig uint32) {
531 }
532
533
534
535
536 func sysSigaction(sig uint32, new, old *sigactiont) {
537 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
538
539
540
541
542
543
544
545
546
547
548
549 if sig != 32 && sig != 33 && sig != 64 {
550
551 systemstack(func() {
552 throw("sigaction failed")
553 })
554 }
555 }
556 }
557
558
559
560
561 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
562
563 func getpid() int
564 func tgkill(tgid, tid, sig int)
565
566
567 func signalM(mp *m, sig int) {
568 tgkill(getpid(), int(mp.procid), sig)
569 }
570
571
572
573
574
575
576
577
578 func validSIGPROF(mp *m, c *sigctxt) bool {
579 code := int32(c.sigcode())
580 setitimer := code == _SI_KERNEL
581 timer_create := code == _SI_TIMER
582
583 if !(setitimer || timer_create) {
584
585
586
587 return true
588 }
589
590 if mp == nil {
591
592
593
594
595
596
597
598
599
600
601
602
603 return setitimer
604 }
605
606
607
608 if mp.profileTimerValid.Load() {
609
610
611
612
613
614 return timer_create
615 }
616
617
618 return setitimer
619 }
620
621 func setProcessCPUProfiler(hz int32) {
622 setProcessCPUProfilerTimer(hz)
623 }
624
625 func setThreadCPUProfiler(hz int32) {
626 mp := getg().m
627 mp.profilehz = hz
628
629
630 if mp.profileTimerValid.Load() {
631 timerid := mp.profileTimer
632 mp.profileTimerValid.Store(false)
633 mp.profileTimer = 0
634
635 ret := timer_delete(timerid)
636 if ret != 0 {
637 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
638 throw("timer_delete")
639 }
640 }
641
642 if hz == 0 {
643
644 return
645 }
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666 spec := new(itimerspec)
667 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
668 spec.it_interval.setNsec(1e9 / int64(hz))
669
670 var timerid int32
671 var sevp sigevent
672 sevp.notify = _SIGEV_THREAD_ID
673 sevp.signo = _SIGPROF
674 sevp.sigev_notify_thread_id = int32(mp.procid)
675 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
676 if ret != 0 {
677
678
679 return
680 }
681
682 ret = timer_settime(timerid, 0, spec, nil)
683 if ret != 0 {
684 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
685 ", 0, {interval: {",
686 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
687 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
688 throw("timer_settime")
689 }
690
691 mp.profileTimer = timerid
692 mp.profileTimerValid.Store(true)
693 }
694
695
696
697 type perThreadSyscallArgs struct {
698 trap uintptr
699 a1 uintptr
700 a2 uintptr
701 a3 uintptr
702 a4 uintptr
703 a5 uintptr
704 a6 uintptr
705 r1 uintptr
706 r2 uintptr
707 }
708
709
710
711
712
713
714 var perThreadSyscall perThreadSyscallArgs
715
716
717
718
719
720
721
722
723
724 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
725 if iscgo {
726
727 panic("doAllThreadsSyscall not supported with cgo enabled")
728 }
729
730
731
732
733
734
735
736
737 stw := stopTheWorld(stwAllThreadsSyscall)
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759 allocmLock.lock()
760
761
762
763
764
765
766 acquirem()
767
768
769
770
771
772
773 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
774 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
775
776 r2 = 0
777 }
778 if errno != 0 {
779 releasem(getg().m)
780 allocmLock.unlock()
781 startTheWorld(stw)
782 return r1, r2, errno
783 }
784
785 perThreadSyscall = perThreadSyscallArgs{
786 trap: trap,
787 a1: a1,
788 a2: a2,
789 a3: a3,
790 a4: a4,
791 a5: a5,
792 a6: a6,
793 r1: r1,
794 r2: r2,
795 }
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832 for mp := allm; mp != nil; mp = mp.alllink {
833 for atomic.Load64(&mp.procid) == 0 {
834
835 osyield()
836 }
837 }
838
839
840
841 gp := getg()
842 tid := gp.m.procid
843 for mp := allm; mp != nil; mp = mp.alllink {
844 if atomic.Load64(&mp.procid) == tid {
845
846 continue
847 }
848 mp.needPerThreadSyscall.Store(1)
849 signalM(mp, sigPerThreadSyscall)
850 }
851
852
853 for mp := allm; mp != nil; mp = mp.alllink {
854 if mp.procid == tid {
855 continue
856 }
857 for mp.needPerThreadSyscall.Load() != 0 {
858 osyield()
859 }
860 }
861
862 perThreadSyscall = perThreadSyscallArgs{}
863
864 releasem(getg().m)
865 allocmLock.unlock()
866 startTheWorld(stw)
867
868 return r1, r2, errno
869 }
870
871
872
873
874
875
876
877 func runPerThreadSyscall() {
878 gp := getg()
879 if gp.m.needPerThreadSyscall.Load() == 0 {
880 return
881 }
882
883 args := perThreadSyscall
884 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
885 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
886
887 r2 = 0
888 }
889 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
890 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
891 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
892 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
893 }
894
895 gp.m.needPerThreadSyscall.Store(0)
896 }
897
898 const (
899 _SI_USER = 0
900 _SI_TKILL = -6
901 _SYS_SECCOMP = 1
902 )
903
904
905
906
907
908 func (c *sigctxt) sigFromUser() bool {
909 code := int32(c.sigcode())
910 return code == _SI_USER || code == _SI_TKILL
911 }
912
913
914
915
916 func (c *sigctxt) sigFromSeccomp() bool {
917 code := int32(c.sigcode())
918 return code == _SYS_SECCOMP
919 }
920
921
922 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
923 r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
924 return int32(r), int32(err)
925 }
926
View as plain text