Text file
src/runtime/asm_arm64.s
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "tls_arm64.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10
11 #ifdef GOARM64_LSE
12 DATA no_lse_msg<>+0x00(SB)/64, $"This program can only run on ARM64 processors with LSE support.\n"
13 GLOBL no_lse_msg<>(SB), RODATA, $64
14 #endif
15
16 TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
17 // SP = stack; R0 = argc; R1 = argv
18
19 SUB $32, RSP
20 MOVW R0, 8(RSP) // argc
21 MOVD R1, 16(RSP) // argv
22
23 #ifdef TLS_darwin
24 // Initialize TLS.
25 MOVD ZR, g // clear g, make sure it's not junk.
26 SUB $32, RSP
27 MRS_TPIDR_R0
28 AND $~7, R0
29 MOVD R0, 16(RSP) // arg2: TLS base
30 MOVD $runtime·tls_g(SB), R2
31 MOVD R2, 8(RSP) // arg1: &tlsg
32 BL ·tlsinit(SB)
33 ADD $32, RSP
34 #endif
35
36 // create istack out of the given (operating system) stack.
37 // _cgo_init may update stackguard.
38 MOVD $runtime·g0(SB), g
39 MOVD RSP, R7
40 MOVD $(-64*1024)(R7), R0
41 MOVD R0, g_stackguard0(g)
42 MOVD R0, g_stackguard1(g)
43 MOVD R0, (g_stack+stack_lo)(g)
44 MOVD R7, (g_stack+stack_hi)(g)
45
46 // if there is a _cgo_init, call it using the gcc ABI.
47 MOVD _cgo_init(SB), R12
48 CBZ R12, nocgo
49
50 #ifdef GOOS_android
51 MRS_TPIDR_R0 // load TLS base pointer
52 MOVD R0, R3 // arg 3: TLS base pointer
53 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
54 #else
55 MOVD $0, R2 // arg 2: not used when using platform's TLS
56 #endif
57 MOVD $setg_gcc<>(SB), R1 // arg 1: setg
58 MOVD g, R0 // arg 0: G
59 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
60 BL (R12)
61 ADD $16, RSP
62
63 nocgo:
64 BL runtime·save_g(SB)
65 // update stackguard after _cgo_init
66 MOVD (g_stack+stack_lo)(g), R0
67 ADD $const_stackGuard, R0
68 MOVD R0, g_stackguard0(g)
69 MOVD R0, g_stackguard1(g)
70
71 // set the per-goroutine and per-mach "registers"
72 MOVD $runtime·m0(SB), R0
73
74 // save m->g0 = g0
75 MOVD g, m_g0(R0)
76 // save m0 to g0->m
77 MOVD R0, g_m(g)
78
79 BL runtime·check(SB)
80
81 #ifdef GOOS_windows
82 BL runtime·wintls(SB)
83 #endif
84
85 // Check that CPU we use for execution supports instructions targeted during compile-time.
86 #ifdef GOARM64_LSE
87 #ifndef GOOS_openbsd
88 // Read the ID_AA64ISAR0_EL1 register
89 MRS ID_AA64ISAR0_EL1, R0
90
91 // Extract the LSE field (bits [23:20])
92 LSR $20, R0, R0
93 AND $0xf, R0, R0
94
95 // LSE support is indicated by a non-zero value
96 CBZ R0, no_lse
97 #endif
98 #endif
99
100 MOVW 8(RSP), R0 // copy argc
101 MOVW R0, -8(RSP)
102 MOVD 16(RSP), R0 // copy argv
103 MOVD R0, 0(RSP)
104 BL runtime·args(SB)
105 BL runtime·osinit(SB)
106 BL runtime·schedinit(SB)
107
108 // create a new goroutine to start program
109 MOVD $runtime·mainPC(SB), R0 // entry
110 SUB $16, RSP
111 MOVD R0, 8(RSP) // arg
112 MOVD $0, 0(RSP) // dummy LR
113 BL runtime·newproc(SB)
114 ADD $16, RSP
115
116 // start this M
117 BL runtime·mstart(SB)
118 RET
119
120 #ifdef GOARM64_LSE
121 #ifndef GOOS_openbsd
122 no_lse:
123 MOVD $1, R0 // stderr
124 MOVD R0, 8(RSP)
125 MOVD $no_lse_msg<>(SB), R1 // message address
126 MOVD R1, 16(RSP)
127 MOVD $64, R2 // message length
128 MOVD R2, 24(RSP)
129 CALL runtime·write(SB)
130 CALL runtime·exit(SB)
131 CALL runtime·abort(SB)
132 RET
133 #endif
134 #endif
135
136 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
137 // intended to be called by debuggers.
138 MOVD $runtime·debugPinnerV1<ABIInternal>(SB), R0
139 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
140
141 MOVD $0, R0
142 MOVD R0, (R0) // boom
143 UNDEF
144
145 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
146 GLOBL runtime·mainPC(SB),RODATA,$8
147
148 // Windows ARM64 needs an immediate 0xf000 argument.
149 // See go.dev/issues/53837.
150 #define BREAK \
151 #ifdef GOOS_windows \
152 BRK $0xf000 \
153 #else \
154 BRK \
155 #endif \
156
157
158 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
159 BREAK
160 RET
161
162 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
163 RET
164
165 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
166 BL runtime·mstart0(SB)
167 RET // not reached
168
169 /*
170 * go-routine
171 */
172
173 // void gogo(Gobuf*)
174 // restore state from Gobuf; longjmp
175 TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
176 MOVD buf+0(FP), R5
177 MOVD gobuf_g(R5), R6
178 MOVD 0(R6), R4 // make sure g != nil
179 B gogo<>(SB)
180
181 TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
182 MOVD R6, g
183 BL runtime·save_g(SB)
184
185 MOVD gobuf_sp(R5), R0
186 MOVD R0, RSP
187 MOVD gobuf_bp(R5), R29
188 MOVD gobuf_lr(R5), LR
189 MOVD gobuf_ret(R5), R0
190 MOVD gobuf_ctxt(R5), R26
191 MOVD $0, gobuf_sp(R5)
192 MOVD $0, gobuf_bp(R5)
193 MOVD $0, gobuf_ret(R5)
194 MOVD $0, gobuf_lr(R5)
195 MOVD $0, gobuf_ctxt(R5)
196 CMP ZR, ZR // set condition codes for == test, needed by stack split
197 MOVD gobuf_pc(R5), R6
198 B (R6)
199
200 // void mcall(fn func(*g))
201 // Switch to m->g0's stack, call fn(g).
202 // Fn must never return. It should gogo(&g->sched)
203 // to keep running g.
204 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
205 MOVD R0, R26 // context
206
207 // Save caller state in g->sched
208 MOVD RSP, R0
209 MOVD R0, (g_sched+gobuf_sp)(g)
210 MOVD R29, (g_sched+gobuf_bp)(g)
211 MOVD LR, (g_sched+gobuf_pc)(g)
212 MOVD $0, (g_sched+gobuf_lr)(g)
213
214 // Switch to m->g0 & its stack, call fn.
215 MOVD g, R3
216 MOVD g_m(g), R8
217 MOVD m_g0(R8), g
218 BL runtime·save_g(SB)
219 CMP g, R3
220 BNE 2(PC)
221 B runtime·badmcall(SB)
222
223 MOVD (g_sched+gobuf_sp)(g), R0
224 MOVD R0, RSP // sp = m->g0->sched.sp
225 MOVD (g_sched+gobuf_bp)(g), R29
226 MOVD R3, R0 // arg = g
227 MOVD $0, -16(RSP) // dummy LR
228 SUB $16, RSP
229 MOVD 0(R26), R4 // code pointer
230 BL (R4)
231 B runtime·badmcall2(SB)
232
233 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
234 // of the G stack. We need to distinguish the routine that
235 // lives at the bottom of the G stack from the one that lives
236 // at the top of the system stack because the one at the top of
237 // the system stack terminates the stack walk (see topofstack()).
238 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
239 UNDEF
240 BL (LR) // make sure this function is not leaf
241 RET
242
243 // func systemstack(fn func())
244 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
245 MOVD fn+0(FP), R3 // R3 = fn
246 MOVD R3, R26 // context
247 MOVD g_m(g), R4 // R4 = m
248
249 MOVD m_gsignal(R4), R5 // R5 = gsignal
250 CMP g, R5
251 BEQ noswitch
252
253 MOVD m_g0(R4), R5 // R5 = g0
254 CMP g, R5
255 BEQ noswitch
256
257 MOVD m_curg(R4), R6
258 CMP g, R6
259 BEQ switch
260
261 // Bad: g is not gsignal, not g0, not curg. What is it?
262 // Hide call from linker nosplit analysis.
263 MOVD $runtime·badsystemstack(SB), R3
264 BL (R3)
265 B runtime·abort(SB)
266
267 switch:
268 // save our state in g->sched. Pretend to
269 // be systemstack_switch if the G stack is scanned.
270 BL gosave_systemstack_switch<>(SB)
271
272 // switch to g0
273 MOVD R5, g
274 BL runtime·save_g(SB)
275 MOVD (g_sched+gobuf_sp)(g), R3
276 MOVD R3, RSP
277 MOVD (g_sched+gobuf_bp)(g), R29
278
279 // call target function
280 MOVD 0(R26), R3 // code pointer
281 BL (R3)
282
283 // switch back to g
284 MOVD g_m(g), R3
285 MOVD m_curg(R3), g
286 BL runtime·save_g(SB)
287 MOVD (g_sched+gobuf_sp)(g), R0
288 MOVD R0, RSP
289 MOVD (g_sched+gobuf_bp)(g), R29
290 MOVD $0, (g_sched+gobuf_sp)(g)
291 MOVD $0, (g_sched+gobuf_bp)(g)
292 RET
293
294 noswitch:
295 // already on m stack, just call directly
296 // Using a tail call here cleans up tracebacks since we won't stop
297 // at an intermediate systemstack.
298 MOVD 0(R26), R3 // code pointer
299 MOVD.P 16(RSP), R30 // restore LR
300 SUB $8, RSP, R29 // restore FP
301 B (R3)
302
303 // func switchToCrashStack0(fn func())
304 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
305 MOVD R0, R26 // context register
306 MOVD g_m(g), R1 // curm
307
308 // set g to gcrash
309 MOVD $runtime·gcrash(SB), g // g = &gcrash
310 BL runtime·save_g(SB) // clobbers R0
311 MOVD R1, g_m(g) // g.m = curm
312 MOVD g, m_g0(R1) // curm.g0 = g
313
314 // switch to crashstack
315 MOVD (g_stack+stack_hi)(g), R1
316 SUB $(4*8), R1
317 MOVD R1, RSP
318
319 // call target function
320 MOVD 0(R26), R0
321 CALL (R0)
322
323 // should never return
324 CALL runtime·abort(SB)
325 UNDEF
326
327 /*
328 * support for morestack
329 */
330
331 // Called during function prolog when more stack is needed.
332 // Caller has already loaded:
333 // R3 prolog's LR (R30)
334 //
335 // The traceback routines see morestack on a g0 as being
336 // the top of a stack (for example, morestack calling newstack
337 // calling the scheduler calling newm calling gc), so we must
338 // record an argument size. For that purpose, it has no arguments.
339 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
340 // Cannot grow scheduler stack (m->g0).
341 MOVD g_m(g), R8
342 MOVD m_g0(R8), R4
343
344 // Called from f.
345 // Set g->sched to context in f
346 MOVD RSP, R0
347 MOVD R0, (g_sched+gobuf_sp)(g)
348 MOVD R29, (g_sched+gobuf_bp)(g)
349 MOVD LR, (g_sched+gobuf_pc)(g)
350 MOVD R3, (g_sched+gobuf_lr)(g)
351 MOVD R26, (g_sched+gobuf_ctxt)(g)
352
353 CMP g, R4
354 BNE 3(PC)
355 BL runtime·badmorestackg0(SB)
356 B runtime·abort(SB)
357
358 // Cannot grow signal stack (m->gsignal).
359 MOVD m_gsignal(R8), R4
360 CMP g, R4
361 BNE 3(PC)
362 BL runtime·badmorestackgsignal(SB)
363 B runtime·abort(SB)
364
365 // Called from f.
366 // Set m->morebuf to f's callers.
367 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
368 MOVD RSP, R0
369 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
370 MOVD g, (m_morebuf+gobuf_g)(R8)
371
372 // Call newstack on m->g0's stack.
373 MOVD m_g0(R8), g
374 BL runtime·save_g(SB)
375 MOVD (g_sched+gobuf_sp)(g), R0
376 MOVD R0, RSP
377 MOVD (g_sched+gobuf_bp)(g), R29
378 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
379 BL runtime·newstack(SB)
380
381 // Not reached, but make sure the return PC from the call to newstack
382 // is still in this function, and not the beginning of the next.
383 UNDEF
384
385 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
386 // Force SPWRITE. This function doesn't actually write SP,
387 // but it is called with a special calling convention where
388 // the caller doesn't save LR on stack but passes it as a
389 // register (R3), and the unwinder currently doesn't understand.
390 // Make it SPWRITE to stop unwinding. (See issue 54332)
391 MOVD RSP, RSP
392
393 MOVW $0, R26
394 B runtime·morestack(SB)
395
396 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
397 TEXT ·spillArgs(SB),NOSPLIT,$0-0
398 STP (R0, R1), (0*8)(R20)
399 STP (R2, R3), (2*8)(R20)
400 STP (R4, R5), (4*8)(R20)
401 STP (R6, R7), (6*8)(R20)
402 STP (R8, R9), (8*8)(R20)
403 STP (R10, R11), (10*8)(R20)
404 STP (R12, R13), (12*8)(R20)
405 STP (R14, R15), (14*8)(R20)
406 FSTPD (F0, F1), (16*8)(R20)
407 FSTPD (F2, F3), (18*8)(R20)
408 FSTPD (F4, F5), (20*8)(R20)
409 FSTPD (F6, F7), (22*8)(R20)
410 FSTPD (F8, F9), (24*8)(R20)
411 FSTPD (F10, F11), (26*8)(R20)
412 FSTPD (F12, F13), (28*8)(R20)
413 FSTPD (F14, F15), (30*8)(R20)
414 RET
415
416 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
417 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
418 LDP (0*8)(R20), (R0, R1)
419 LDP (2*8)(R20), (R2, R3)
420 LDP (4*8)(R20), (R4, R5)
421 LDP (6*8)(R20), (R6, R7)
422 LDP (8*8)(R20), (R8, R9)
423 LDP (10*8)(R20), (R10, R11)
424 LDP (12*8)(R20), (R12, R13)
425 LDP (14*8)(R20), (R14, R15)
426 FLDPD (16*8)(R20), (F0, F1)
427 FLDPD (18*8)(R20), (F2, F3)
428 FLDPD (20*8)(R20), (F4, F5)
429 FLDPD (22*8)(R20), (F6, F7)
430 FLDPD (24*8)(R20), (F8, F9)
431 FLDPD (26*8)(R20), (F10, F11)
432 FLDPD (28*8)(R20), (F12, F13)
433 FLDPD (30*8)(R20), (F14, F15)
434 RET
435
436 // reflectcall: call a function with the given argument list
437 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
438 // we don't have variable-sized frames, so we use a small number
439 // of constant-sized-frame functions to encode a few bits of size in the pc.
440 // Caution: ugly multiline assembly macros in your future!
441
442 #define DISPATCH(NAME,MAXSIZE) \
443 MOVD $MAXSIZE, R27; \
444 CMP R27, R16; \
445 BGT 3(PC); \
446 MOVD $NAME(SB), R27; \
447 B (R27)
448 // Note: can't just "B NAME(SB)" - bad inlining results.
449
450 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
451 MOVWU frameSize+32(FP), R16
452 DISPATCH(runtime·call16, 16)
453 DISPATCH(runtime·call32, 32)
454 DISPATCH(runtime·call64, 64)
455 DISPATCH(runtime·call128, 128)
456 DISPATCH(runtime·call256, 256)
457 DISPATCH(runtime·call512, 512)
458 DISPATCH(runtime·call1024, 1024)
459 DISPATCH(runtime·call2048, 2048)
460 DISPATCH(runtime·call4096, 4096)
461 DISPATCH(runtime·call8192, 8192)
462 DISPATCH(runtime·call16384, 16384)
463 DISPATCH(runtime·call32768, 32768)
464 DISPATCH(runtime·call65536, 65536)
465 DISPATCH(runtime·call131072, 131072)
466 DISPATCH(runtime·call262144, 262144)
467 DISPATCH(runtime·call524288, 524288)
468 DISPATCH(runtime·call1048576, 1048576)
469 DISPATCH(runtime·call2097152, 2097152)
470 DISPATCH(runtime·call4194304, 4194304)
471 DISPATCH(runtime·call8388608, 8388608)
472 DISPATCH(runtime·call16777216, 16777216)
473 DISPATCH(runtime·call33554432, 33554432)
474 DISPATCH(runtime·call67108864, 67108864)
475 DISPATCH(runtime·call134217728, 134217728)
476 DISPATCH(runtime·call268435456, 268435456)
477 DISPATCH(runtime·call536870912, 536870912)
478 DISPATCH(runtime·call1073741824, 1073741824)
479 MOVD $runtime·badreflectcall(SB), R0
480 B (R0)
481
482 #define CALLFN(NAME,MAXSIZE) \
483 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
484 NO_LOCAL_POINTERS; \
485 /* copy arguments to stack */ \
486 MOVD stackArgs+16(FP), R3; \
487 MOVWU stackArgsSize+24(FP), R4; \
488 ADD $8, RSP, R5; \
489 BIC $0xf, R4, R6; \
490 CBZ R6, 6(PC); \
491 /* if R6=(argsize&~15) != 0 */ \
492 ADD R6, R5, R6; \
493 /* copy 16 bytes a time */ \
494 LDP.P 16(R3), (R7, R8); \
495 STP.P (R7, R8), 16(R5); \
496 CMP R5, R6; \
497 BNE -3(PC); \
498 AND $0xf, R4, R6; \
499 CBZ R6, 6(PC); \
500 /* if R6=(argsize&15) != 0 */ \
501 ADD R6, R5, R6; \
502 /* copy 1 byte a time for the rest */ \
503 MOVBU.P 1(R3), R7; \
504 MOVBU.P R7, 1(R5); \
505 CMP R5, R6; \
506 BNE -3(PC); \
507 /* set up argument registers */ \
508 MOVD regArgs+40(FP), R20; \
509 CALL ·unspillArgs(SB); \
510 /* call function */ \
511 MOVD f+8(FP), R26; \
512 MOVD (R26), R20; \
513 PCDATA $PCDATA_StackMapIndex, $0; \
514 BL (R20); \
515 /* copy return values back */ \
516 MOVD regArgs+40(FP), R20; \
517 CALL ·spillArgs(SB); \
518 MOVD stackArgsType+0(FP), R7; \
519 MOVD stackArgs+16(FP), R3; \
520 MOVWU stackArgsSize+24(FP), R4; \
521 MOVWU stackRetOffset+28(FP), R6; \
522 ADD $8, RSP, R5; \
523 ADD R6, R5; \
524 ADD R6, R3; \
525 SUB R6, R4; \
526 BL callRet<>(SB); \
527 RET
528
529 // callRet copies return values back at the end of call*. This is a
530 // separate function so it can allocate stack space for the arguments
531 // to reflectcallmove. It does not follow the Go ABI; it expects its
532 // arguments in registers.
533 TEXT callRet<>(SB), NOSPLIT, $48-0
534 NO_LOCAL_POINTERS
535 STP (R7, R3), 8(RSP)
536 STP (R5, R4), 24(RSP)
537 MOVD R20, 40(RSP)
538 BL runtime·reflectcallmove(SB)
539 RET
540
541 CALLFN(·call16, 16)
542 CALLFN(·call32, 32)
543 CALLFN(·call64, 64)
544 CALLFN(·call128, 128)
545 CALLFN(·call256, 256)
546 CALLFN(·call512, 512)
547 CALLFN(·call1024, 1024)
548 CALLFN(·call2048, 2048)
549 CALLFN(·call4096, 4096)
550 CALLFN(·call8192, 8192)
551 CALLFN(·call16384, 16384)
552 CALLFN(·call32768, 32768)
553 CALLFN(·call65536, 65536)
554 CALLFN(·call131072, 131072)
555 CALLFN(·call262144, 262144)
556 CALLFN(·call524288, 524288)
557 CALLFN(·call1048576, 1048576)
558 CALLFN(·call2097152, 2097152)
559 CALLFN(·call4194304, 4194304)
560 CALLFN(·call8388608, 8388608)
561 CALLFN(·call16777216, 16777216)
562 CALLFN(·call33554432, 33554432)
563 CALLFN(·call67108864, 67108864)
564 CALLFN(·call134217728, 134217728)
565 CALLFN(·call268435456, 268435456)
566 CALLFN(·call536870912, 536870912)
567 CALLFN(·call1073741824, 1073741824)
568
569 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
570 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
571 MOVB runtime·useAeshash(SB), R10
572 CBZ R10, noaes
573 MOVD $runtime·aeskeysched+0(SB), R3
574
575 VEOR V0.B16, V0.B16, V0.B16
576 VLD1 (R3), [V2.B16]
577 VLD1 (R0), V0.S[1]
578 VMOV R1, V0.S[0]
579
580 AESE V2.B16, V0.B16
581 AESMC V0.B16, V0.B16
582 AESE V2.B16, V0.B16
583 AESMC V0.B16, V0.B16
584 AESE V2.B16, V0.B16
585
586 VMOV V0.D[0], R0
587 RET
588 noaes:
589 B runtime·memhash32Fallback<ABIInternal>(SB)
590
591 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
592 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
593 MOVB runtime·useAeshash(SB), R10
594 CBZ R10, noaes
595 MOVD $runtime·aeskeysched+0(SB), R3
596
597 VEOR V0.B16, V0.B16, V0.B16
598 VLD1 (R3), [V2.B16]
599 VLD1 (R0), V0.D[1]
600 VMOV R1, V0.D[0]
601
602 AESE V2.B16, V0.B16
603 AESMC V0.B16, V0.B16
604 AESE V2.B16, V0.B16
605 AESMC V0.B16, V0.B16
606 AESE V2.B16, V0.B16
607
608 VMOV V0.D[0], R0
609 RET
610 noaes:
611 B runtime·memhash64Fallback<ABIInternal>(SB)
612
613 // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
614 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
615 MOVB runtime·useAeshash(SB), R10
616 CBZ R10, noaes
617 B aeshashbody<>(SB)
618 noaes:
619 B runtime·memhashFallback<ABIInternal>(SB)
620
621 // func strhash(p unsafe.Pointer, h uintptr) uintptr
622 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
623 MOVB runtime·useAeshash(SB), R10
624 CBZ R10, noaes
625 LDP (R0), (R0, R2) // string data / length
626 B aeshashbody<>(SB)
627 noaes:
628 B runtime·strhashFallback<ABIInternal>(SB)
629
630 // R0: data
631 // R1: seed data
632 // R2: length
633 // At return, R0 = return value
634 TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
635 VEOR V30.B16, V30.B16, V30.B16
636 VMOV R1, V30.D[0]
637 VMOV R2, V30.D[1] // load length into seed
638
639 MOVD $runtime·aeskeysched+0(SB), R4
640 VLD1.P 16(R4), [V0.B16]
641 AESE V30.B16, V0.B16
642 AESMC V0.B16, V0.B16
643 CMP $16, R2
644 BLO aes0to15
645 BEQ aes16
646 CMP $32, R2
647 BLS aes17to32
648 CMP $64, R2
649 BLS aes33to64
650 CMP $128, R2
651 BLS aes65to128
652 B aes129plus
653
654 aes0to15:
655 CBZ R2, aes0
656 VEOR V2.B16, V2.B16, V2.B16
657 TBZ $3, R2, less_than_8
658 VLD1.P 8(R0), V2.D[0]
659
660 less_than_8:
661 TBZ $2, R2, less_than_4
662 VLD1.P 4(R0), V2.S[2]
663
664 less_than_4:
665 TBZ $1, R2, less_than_2
666 VLD1.P 2(R0), V2.H[6]
667
668 less_than_2:
669 TBZ $0, R2, done
670 VLD1 (R0), V2.B[14]
671 done:
672 AESE V0.B16, V2.B16
673 AESMC V2.B16, V2.B16
674 AESE V0.B16, V2.B16
675 AESMC V2.B16, V2.B16
676 AESE V0.B16, V2.B16
677 AESMC V2.B16, V2.B16
678
679 VMOV V2.D[0], R0
680 RET
681
682 aes0:
683 VMOV V0.D[0], R0
684 RET
685
686 aes16:
687 VLD1 (R0), [V2.B16]
688 B done
689
690 aes17to32:
691 // make second seed
692 VLD1 (R4), [V1.B16]
693 AESE V30.B16, V1.B16
694 AESMC V1.B16, V1.B16
695 SUB $16, R2, R10
696 VLD1.P (R0)(R10), [V2.B16]
697 VLD1 (R0), [V3.B16]
698
699 AESE V0.B16, V2.B16
700 AESMC V2.B16, V2.B16
701 AESE V1.B16, V3.B16
702 AESMC V3.B16, V3.B16
703
704 AESE V0.B16, V2.B16
705 AESMC V2.B16, V2.B16
706 AESE V1.B16, V3.B16
707 AESMC V3.B16, V3.B16
708
709 AESE V0.B16, V2.B16
710 AESE V1.B16, V3.B16
711
712 VEOR V3.B16, V2.B16, V2.B16
713
714 VMOV V2.D[0], R0
715 RET
716
717 aes33to64:
718 VLD1 (R4), [V1.B16, V2.B16, V3.B16]
719 AESE V30.B16, V1.B16
720 AESMC V1.B16, V1.B16
721 AESE V30.B16, V2.B16
722 AESMC V2.B16, V2.B16
723 AESE V30.B16, V3.B16
724 AESMC V3.B16, V3.B16
725 SUB $32, R2, R10
726
727 VLD1.P (R0)(R10), [V4.B16, V5.B16]
728 VLD1 (R0), [V6.B16, V7.B16]
729
730 AESE V0.B16, V4.B16
731 AESMC V4.B16, V4.B16
732 AESE V1.B16, V5.B16
733 AESMC V5.B16, V5.B16
734 AESE V2.B16, V6.B16
735 AESMC V6.B16, V6.B16
736 AESE V3.B16, V7.B16
737 AESMC V7.B16, V7.B16
738
739 AESE V0.B16, V4.B16
740 AESMC V4.B16, V4.B16
741 AESE V1.B16, V5.B16
742 AESMC V5.B16, V5.B16
743 AESE V2.B16, V6.B16
744 AESMC V6.B16, V6.B16
745 AESE V3.B16, V7.B16
746 AESMC V7.B16, V7.B16
747
748 AESE V0.B16, V4.B16
749 AESE V1.B16, V5.B16
750 AESE V2.B16, V6.B16
751 AESE V3.B16, V7.B16
752
753 VEOR V6.B16, V4.B16, V4.B16
754 VEOR V7.B16, V5.B16, V5.B16
755 VEOR V5.B16, V4.B16, V4.B16
756
757 VMOV V4.D[0], R0
758 RET
759
760 aes65to128:
761 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
762 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
763 AESE V30.B16, V1.B16
764 AESMC V1.B16, V1.B16
765 AESE V30.B16, V2.B16
766 AESMC V2.B16, V2.B16
767 AESE V30.B16, V3.B16
768 AESMC V3.B16, V3.B16
769 AESE V30.B16, V4.B16
770 AESMC V4.B16, V4.B16
771 AESE V30.B16, V5.B16
772 AESMC V5.B16, V5.B16
773 AESE V30.B16, V6.B16
774 AESMC V6.B16, V6.B16
775 AESE V30.B16, V7.B16
776 AESMC V7.B16, V7.B16
777
778 SUB $64, R2, R10
779 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
780 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
781 AESE V0.B16, V8.B16
782 AESMC V8.B16, V8.B16
783 AESE V1.B16, V9.B16
784 AESMC V9.B16, V9.B16
785 AESE V2.B16, V10.B16
786 AESMC V10.B16, V10.B16
787 AESE V3.B16, V11.B16
788 AESMC V11.B16, V11.B16
789 AESE V4.B16, V12.B16
790 AESMC V12.B16, V12.B16
791 AESE V5.B16, V13.B16
792 AESMC V13.B16, V13.B16
793 AESE V6.B16, V14.B16
794 AESMC V14.B16, V14.B16
795 AESE V7.B16, V15.B16
796 AESMC V15.B16, V15.B16
797
798 AESE V0.B16, V8.B16
799 AESMC V8.B16, V8.B16
800 AESE V1.B16, V9.B16
801 AESMC V9.B16, V9.B16
802 AESE V2.B16, V10.B16
803 AESMC V10.B16, V10.B16
804 AESE V3.B16, V11.B16
805 AESMC V11.B16, V11.B16
806 AESE V4.B16, V12.B16
807 AESMC V12.B16, V12.B16
808 AESE V5.B16, V13.B16
809 AESMC V13.B16, V13.B16
810 AESE V6.B16, V14.B16
811 AESMC V14.B16, V14.B16
812 AESE V7.B16, V15.B16
813 AESMC V15.B16, V15.B16
814
815 AESE V0.B16, V8.B16
816 AESE V1.B16, V9.B16
817 AESE V2.B16, V10.B16
818 AESE V3.B16, V11.B16
819 AESE V4.B16, V12.B16
820 AESE V5.B16, V13.B16
821 AESE V6.B16, V14.B16
822 AESE V7.B16, V15.B16
823
824 VEOR V12.B16, V8.B16, V8.B16
825 VEOR V13.B16, V9.B16, V9.B16
826 VEOR V14.B16, V10.B16, V10.B16
827 VEOR V15.B16, V11.B16, V11.B16
828 VEOR V10.B16, V8.B16, V8.B16
829 VEOR V11.B16, V9.B16, V9.B16
830 VEOR V9.B16, V8.B16, V8.B16
831
832 VMOV V8.D[0], R0
833 RET
834
835 aes129plus:
836 PRFM (R0), PLDL1KEEP
837 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
838 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
839 AESE V30.B16, V1.B16
840 AESMC V1.B16, V1.B16
841 AESE V30.B16, V2.B16
842 AESMC V2.B16, V2.B16
843 AESE V30.B16, V3.B16
844 AESMC V3.B16, V3.B16
845 AESE V30.B16, V4.B16
846 AESMC V4.B16, V4.B16
847 AESE V30.B16, V5.B16
848 AESMC V5.B16, V5.B16
849 AESE V30.B16, V6.B16
850 AESMC V6.B16, V6.B16
851 AESE V30.B16, V7.B16
852 AESMC V7.B16, V7.B16
853 ADD R0, R2, R10
854 SUB $128, R10, R10
855 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
856 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
857 SUB $1, R2, R2
858 LSR $7, R2, R2
859
860 aesloop:
861 AESE V8.B16, V0.B16
862 AESMC V0.B16, V0.B16
863 AESE V9.B16, V1.B16
864 AESMC V1.B16, V1.B16
865 AESE V10.B16, V2.B16
866 AESMC V2.B16, V2.B16
867 AESE V11.B16, V3.B16
868 AESMC V3.B16, V3.B16
869 AESE V12.B16, V4.B16
870 AESMC V4.B16, V4.B16
871 AESE V13.B16, V5.B16
872 AESMC V5.B16, V5.B16
873 AESE V14.B16, V6.B16
874 AESMC V6.B16, V6.B16
875 AESE V15.B16, V7.B16
876 AESMC V7.B16, V7.B16
877
878 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
879 AESE V8.B16, V0.B16
880 AESMC V0.B16, V0.B16
881 AESE V9.B16, V1.B16
882 AESMC V1.B16, V1.B16
883 AESE V10.B16, V2.B16
884 AESMC V2.B16, V2.B16
885 AESE V11.B16, V3.B16
886 AESMC V3.B16, V3.B16
887
888 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
889 AESE V12.B16, V4.B16
890 AESMC V4.B16, V4.B16
891 AESE V13.B16, V5.B16
892 AESMC V5.B16, V5.B16
893 AESE V14.B16, V6.B16
894 AESMC V6.B16, V6.B16
895 AESE V15.B16, V7.B16
896 AESMC V7.B16, V7.B16
897 SUB $1, R2, R2
898 CBNZ R2, aesloop
899
900 AESE V8.B16, V0.B16
901 AESMC V0.B16, V0.B16
902 AESE V9.B16, V1.B16
903 AESMC V1.B16, V1.B16
904 AESE V10.B16, V2.B16
905 AESMC V2.B16, V2.B16
906 AESE V11.B16, V3.B16
907 AESMC V3.B16, V3.B16
908 AESE V12.B16, V4.B16
909 AESMC V4.B16, V4.B16
910 AESE V13.B16, V5.B16
911 AESMC V5.B16, V5.B16
912 AESE V14.B16, V6.B16
913 AESMC V6.B16, V6.B16
914 AESE V15.B16, V7.B16
915 AESMC V7.B16, V7.B16
916
917 AESE V8.B16, V0.B16
918 AESMC V0.B16, V0.B16
919 AESE V9.B16, V1.B16
920 AESMC V1.B16, V1.B16
921 AESE V10.B16, V2.B16
922 AESMC V2.B16, V2.B16
923 AESE V11.B16, V3.B16
924 AESMC V3.B16, V3.B16
925 AESE V12.B16, V4.B16
926 AESMC V4.B16, V4.B16
927 AESE V13.B16, V5.B16
928 AESMC V5.B16, V5.B16
929 AESE V14.B16, V6.B16
930 AESMC V6.B16, V6.B16
931 AESE V15.B16, V7.B16
932 AESMC V7.B16, V7.B16
933
934 AESE V8.B16, V0.B16
935 AESE V9.B16, V1.B16
936 AESE V10.B16, V2.B16
937 AESE V11.B16, V3.B16
938 AESE V12.B16, V4.B16
939 AESE V13.B16, V5.B16
940 AESE V14.B16, V6.B16
941 AESE V15.B16, V7.B16
942
943 VEOR V0.B16, V1.B16, V0.B16
944 VEOR V2.B16, V3.B16, V2.B16
945 VEOR V4.B16, V5.B16, V4.B16
946 VEOR V6.B16, V7.B16, V6.B16
947 VEOR V0.B16, V2.B16, V0.B16
948 VEOR V4.B16, V6.B16, V4.B16
949 VEOR V4.B16, V0.B16, V0.B16
950
951 VMOV V0.D[0], R0
952 RET
953
954 TEXT runtime·procyield(SB),NOSPLIT,$0-0
955 MOVWU cycles+0(FP), R0
956 again:
957 YIELD
958 SUBW $1, R0
959 CBNZ R0, again
960 RET
961
962 // Save state of caller into g->sched,
963 // but using fake PC from systemstack_switch.
964 // Must only be called from functions with no locals ($0)
965 // or else unwinding from systemstack_switch is incorrect.
966 // Smashes R0.
967 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
968 MOVD $runtime·systemstack_switch(SB), R0
969 ADD $8, R0 // get past prologue
970 MOVD R0, (g_sched+gobuf_pc)(g)
971 MOVD RSP, R0
972 MOVD R0, (g_sched+gobuf_sp)(g)
973 MOVD R29, (g_sched+gobuf_bp)(g)
974 MOVD $0, (g_sched+gobuf_lr)(g)
975 MOVD $0, (g_sched+gobuf_ret)(g)
976 // Assert ctxt is zero. See func save.
977 MOVD (g_sched+gobuf_ctxt)(g), R0
978 CBZ R0, 2(PC)
979 CALL runtime·abort(SB)
980 RET
981
982 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
983 // Call fn(arg) aligned appropriately for the gcc ABI.
984 // Called on a system stack, and there may be no g yet (during needm).
985 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
986 MOVD fn+0(FP), R1
987 MOVD arg+8(FP), R0
988 SUB $16, RSP // skip over saved frame pointer below RSP
989 BL (R1)
990 ADD $16, RSP // skip over saved frame pointer below RSP
991 RET
992
993 // func asmcgocall(fn, arg unsafe.Pointer) int32
994 // Call fn(arg) on the scheduler stack,
995 // aligned appropriately for the gcc ABI.
996 // See cgocall.go for more details.
997 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
998 MOVD fn+0(FP), R1
999 MOVD arg+8(FP), R0
1000
1001 MOVD RSP, R2 // save original stack pointer
1002 CBZ g, nosave
1003 MOVD g, R4
1004
1005 // Figure out if we need to switch to m->g0 stack.
1006 // We get called to create new OS threads too, and those
1007 // come in on the m->g0 stack already. Or we might already
1008 // be on the m->gsignal stack.
1009 MOVD g_m(g), R8
1010 MOVD m_gsignal(R8), R3
1011 CMP R3, g
1012 BEQ nosave
1013 MOVD m_g0(R8), R3
1014 CMP R3, g
1015 BEQ nosave
1016
1017 // Switch to system stack.
1018 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
1019 BL gosave_systemstack_switch<>(SB)
1020 MOVD R3, g
1021 BL runtime·save_g(SB)
1022 MOVD (g_sched+gobuf_sp)(g), R0
1023 MOVD R0, RSP
1024 MOVD (g_sched+gobuf_bp)(g), R29
1025 MOVD R9, R0
1026
1027 // Now on a scheduling stack (a pthread-created stack).
1028 // Save room for two of our pointers /*, plus 32 bytes of callee
1029 // save area that lives on the caller stack. */
1030 MOVD RSP, R13
1031 SUB $16, R13
1032 MOVD R13, RSP
1033 MOVD R4, 0(RSP) // save old g on stack
1034 MOVD (g_stack+stack_hi)(R4), R4
1035 SUB R2, R4
1036 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
1037 BL (R1)
1038 MOVD R0, R9
1039
1040 // Restore g, stack pointer. R0 is errno, so don't touch it
1041 MOVD 0(RSP), g
1042 BL runtime·save_g(SB)
1043 MOVD (g_stack+stack_hi)(g), R5
1044 MOVD 8(RSP), R6
1045 SUB R6, R5
1046 MOVD R9, R0
1047 MOVD R5, RSP
1048
1049 MOVW R0, ret+16(FP)
1050 RET
1051
1052 nosave:
1053 // Running on a system stack, perhaps even without a g.
1054 // Having no g can happen during thread creation or thread teardown
1055 // (see needm/dropm on Solaris, for example).
1056 // This code is like the above sequence but without saving/restoring g
1057 // and without worrying about the stack moving out from under us
1058 // (because we're on a system stack, not a goroutine stack).
1059 // The above code could be used directly if already on a system stack,
1060 // but then the only path through this code would be a rare case on Solaris.
1061 // Using this code for all "already on system stack" calls exercises it more,
1062 // which should help keep it correct.
1063 MOVD RSP, R13
1064 SUB $16, R13
1065 MOVD R13, RSP
1066 MOVD $0, R4
1067 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
1068 MOVD R2, 8(RSP) // Save original stack pointer.
1069 BL (R1)
1070 // Restore stack pointer.
1071 MOVD 8(RSP), R2
1072 MOVD R2, RSP
1073 MOVD R0, ret+16(FP)
1074 RET
1075
1076 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1077 // See cgocall.go for more details.
1078 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1079 NO_LOCAL_POINTERS
1080
1081 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1082 // It is used to dropm while thread is exiting.
1083 MOVD fn+0(FP), R1
1084 CBNZ R1, loadg
1085 // Restore the g from frame.
1086 MOVD frame+8(FP), g
1087 B dropm
1088
1089 loadg:
1090 // Load g from thread-local storage.
1091 BL runtime·load_g(SB)
1092
1093 // If g is nil, Go did not create the current thread,
1094 // or if this thread never called into Go on pthread platforms.
1095 // Call needm to obtain one for temporary use.
1096 // In this case, we're running on the thread stack, so there's
1097 // lots of space, but the linker doesn't know. Hide the call from
1098 // the linker analysis by using an indirect call.
1099 CBZ g, needm
1100
1101 MOVD g_m(g), R8
1102 MOVD R8, savedm-8(SP)
1103 B havem
1104
1105 needm:
1106 MOVD g, savedm-8(SP) // g is zero, so is m.
1107 MOVD $runtime·needAndBindM(SB), R0
1108 BL (R0)
1109
1110 // Set m->g0->sched.sp = SP, so that if a panic happens
1111 // during the function we are about to execute, it will
1112 // have a valid SP to run on the g0 stack.
1113 // The next few lines (after the havem label)
1114 // will save this SP onto the stack and then write
1115 // the same SP back to m->sched.sp. That seems redundant,
1116 // but if an unrecovered panic happens, unwindm will
1117 // restore the g->sched.sp from the stack location
1118 // and then systemstack will try to use it. If we don't set it here,
1119 // that restored SP will be uninitialized (typically 0) and
1120 // will not be usable.
1121 MOVD g_m(g), R8
1122 MOVD m_g0(R8), R3
1123 MOVD RSP, R0
1124 MOVD R0, (g_sched+gobuf_sp)(R3)
1125 MOVD R29, (g_sched+gobuf_bp)(R3)
1126
1127 havem:
1128 // Now there's a valid m, and we're running on its m->g0.
1129 // Save current m->g0->sched.sp on stack and then set it to SP.
1130 // Save current sp in m->g0->sched.sp in preparation for
1131 // switch back to m->curg stack.
1132 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1133 // Beware that the frame size is actually 32+16.
1134 MOVD m_g0(R8), R3
1135 MOVD (g_sched+gobuf_sp)(R3), R4
1136 MOVD R4, savedsp-16(SP)
1137 MOVD RSP, R0
1138 MOVD R0, (g_sched+gobuf_sp)(R3)
1139
1140 // Switch to m->curg stack and call runtime.cgocallbackg.
1141 // Because we are taking over the execution of m->curg
1142 // but *not* resuming what had been running, we need to
1143 // save that information (m->curg->sched) so we can restore it.
1144 // We can restore m->curg->sched.sp easily, because calling
1145 // runtime.cgocallbackg leaves SP unchanged upon return.
1146 // To save m->curg->sched.pc, we push it onto the curg stack and
1147 // open a frame the same size as cgocallback's g0 frame.
1148 // Once we switch to the curg stack, the pushed PC will appear
1149 // to be the return PC of cgocallback, so that the traceback
1150 // will seamlessly trace back into the earlier calls.
1151 MOVD m_curg(R8), g
1152 BL runtime·save_g(SB)
1153 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1154 MOVD (g_sched+gobuf_pc)(g), R5
1155 MOVD R5, -48(R4)
1156 MOVD (g_sched+gobuf_bp)(g), R5
1157 MOVD R5, -56(R4)
1158 // Gather our arguments into registers.
1159 MOVD fn+0(FP), R1
1160 MOVD frame+8(FP), R2
1161 MOVD ctxt+16(FP), R3
1162 MOVD $-48(R4), R0 // maintain 16-byte SP alignment
1163 MOVD R0, RSP // switch stack
1164 MOVD R1, 8(RSP)
1165 MOVD R2, 16(RSP)
1166 MOVD R3, 24(RSP)
1167 MOVD $runtime·cgocallbackg(SB), R0
1168 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
1169
1170 // Restore g->sched (== m->curg->sched) from saved values.
1171 MOVD 0(RSP), R5
1172 MOVD R5, (g_sched+gobuf_pc)(g)
1173 MOVD RSP, R4
1174 ADD $48, R4, R4
1175 MOVD R4, (g_sched+gobuf_sp)(g)
1176
1177 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1178 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1179 // so we do not have to restore it.)
1180 MOVD g_m(g), R8
1181 MOVD m_g0(R8), g
1182 BL runtime·save_g(SB)
1183 MOVD (g_sched+gobuf_sp)(g), R0
1184 MOVD R0, RSP
1185 MOVD savedsp-16(SP), R4
1186 MOVD R4, (g_sched+gobuf_sp)(g)
1187
1188 // If the m on entry was nil, we called needm above to borrow an m,
1189 // 1. for the duration of the call on non-pthread platforms,
1190 // 2. or the duration of the C thread alive on pthread platforms.
1191 // If the m on entry wasn't nil,
1192 // 1. the thread might be a Go thread,
1193 // 2. or it wasn't the first call from a C thread on pthread platforms,
1194 // since then we skip dropm to reuse the m in the first call.
1195 MOVD savedm-8(SP), R6
1196 CBNZ R6, droppedm
1197
1198 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1199 MOVD _cgo_pthread_key_created(SB), R6
1200 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1201 CBZ R6, dropm
1202 MOVD (R6), R6
1203 CBNZ R6, droppedm
1204
1205 dropm:
1206 MOVD $runtime·dropm(SB), R0
1207 BL (R0)
1208 droppedm:
1209
1210 // Done!
1211 RET
1212
1213 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1214 // Must obey the gcc calling convention.
1215 TEXT _cgo_topofstack(SB),NOSPLIT,$24
1216 // g (R28) and REGTMP (R27) might be clobbered by load_g. They
1217 // are callee-save in the gcc calling convention, so save them.
1218 MOVD R27, savedR27-8(SP)
1219 MOVD g, saveG-16(SP)
1220
1221 BL runtime·load_g(SB)
1222 MOVD g_m(g), R0
1223 MOVD m_curg(R0), R0
1224 MOVD (g_stack+stack_hi)(R0), R0
1225
1226 MOVD saveG-16(SP), g
1227 MOVD savedR28-8(SP), R27
1228 RET
1229
1230 // void setg(G*); set g. for use by needm.
1231 TEXT runtime·setg(SB), NOSPLIT, $0-8
1232 MOVD gg+0(FP), g
1233 // This only happens if iscgo, so jump straight to save_g
1234 BL runtime·save_g(SB)
1235 RET
1236
1237 // void setg_gcc(G*); set g called from gcc
1238 TEXT setg_gcc<>(SB),NOSPLIT,$8
1239 MOVD R0, g
1240 MOVD R27, savedR27-8(SP)
1241 BL runtime·save_g(SB)
1242 MOVD savedR27-8(SP), R27
1243 RET
1244
1245 TEXT runtime·emptyfunc(SB),0,$0-0
1246 RET
1247
1248 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1249 MOVD ZR, R0
1250 MOVD (R0), R0
1251 UNDEF
1252
1253 TEXT runtime·return0(SB), NOSPLIT, $0
1254 MOVW $0, R0
1255 RET
1256
1257 // The top-most function running on a goroutine
1258 // returns to goexit+PCQuantum.
1259 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1260 MOVD R0, R0 // NOP
1261 BL runtime·goexit1(SB) // does not return
1262
1263 // This is called from .init_array and follows the platform, not Go, ABI.
1264 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1265 SUB $0x10, RSP
1266 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1267 MOVD runtime·lastmoduledatap(SB), R1
1268 MOVD R0, moduledata_next(R1)
1269 MOVD R0, runtime·lastmoduledatap(SB)
1270 MOVD 8(RSP), R27
1271 ADD $0x10, RSP
1272 RET
1273
1274 TEXT ·checkASM(SB),NOSPLIT,$0-1
1275 MOVW $1, R3
1276 MOVB R3, ret+0(FP)
1277 RET
1278
1279 // gcWriteBarrier informs the GC about heap pointer writes.
1280 //
1281 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1282 // number of bytes of buffer needed in R25, and returns a pointer
1283 // to the buffer space in R25.
1284 // It clobbers condition codes.
1285 // It does not clobber any general-purpose registers except R27,
1286 // but may clobber others (e.g., floating point registers)
1287 // The act of CALLing gcWriteBarrier will clobber R30 (LR).
1288 TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
1289 // Save the registers clobbered by the fast path.
1290 STP (R0, R1), 184(RSP)
1291 retry:
1292 MOVD g_m(g), R0
1293 MOVD m_p(R0), R0
1294 MOVD (p_wbBuf+wbBuf_next)(R0), R1
1295 MOVD (p_wbBuf+wbBuf_end)(R0), R27
1296 // Increment wbBuf.next position.
1297 ADD R25, R1
1298 // Is the buffer full?
1299 CMP R27, R1
1300 BHI flush
1301 // Commit to the larger buffer.
1302 MOVD R1, (p_wbBuf+wbBuf_next)(R0)
1303 // Make return value (the original next position)
1304 SUB R25, R1, R25
1305 // Restore registers.
1306 LDP 184(RSP), (R0, R1)
1307 RET
1308
1309 flush:
1310 // Save all general purpose registers since these could be
1311 // clobbered by wbBufFlush and were not saved by the caller.
1312 // R0 and R1 already saved
1313 STP (R2, R3), 1*8(RSP)
1314 STP (R4, R5), 3*8(RSP)
1315 STP (R6, R7), 5*8(RSP)
1316 STP (R8, R9), 7*8(RSP)
1317 STP (R10, R11), 9*8(RSP)
1318 STP (R12, R13), 11*8(RSP)
1319 STP (R14, R15), 13*8(RSP)
1320 // R16, R17 may be clobbered by linker trampoline
1321 // R18 is unused.
1322 STP (R19, R20), 15*8(RSP)
1323 STP (R21, R22), 17*8(RSP)
1324 STP (R23, R24), 19*8(RSP)
1325 STP (R25, R26), 21*8(RSP)
1326 // R27 is temp register.
1327 // R28 is g.
1328 // R29 is frame pointer (unused).
1329 // R30 is LR, which was saved by the prologue.
1330 // R31 is SP.
1331
1332 CALL runtime·wbBufFlush(SB)
1333 LDP 1*8(RSP), (R2, R3)
1334 LDP 3*8(RSP), (R4, R5)
1335 LDP 5*8(RSP), (R6, R7)
1336 LDP 7*8(RSP), (R8, R9)
1337 LDP 9*8(RSP), (R10, R11)
1338 LDP 11*8(RSP), (R12, R13)
1339 LDP 13*8(RSP), (R14, R15)
1340 LDP 15*8(RSP), (R19, R20)
1341 LDP 17*8(RSP), (R21, R22)
1342 LDP 19*8(RSP), (R23, R24)
1343 LDP 21*8(RSP), (R25, R26)
1344 JMP retry
1345
1346 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1347 MOVD $8, R25
1348 JMP gcWriteBarrier<>(SB)
1349 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1350 MOVD $16, R25
1351 JMP gcWriteBarrier<>(SB)
1352 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1353 MOVD $24, R25
1354 JMP gcWriteBarrier<>(SB)
1355 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1356 MOVD $32, R25
1357 JMP gcWriteBarrier<>(SB)
1358 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1359 MOVD $40, R25
1360 JMP gcWriteBarrier<>(SB)
1361 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1362 MOVD $48, R25
1363 JMP gcWriteBarrier<>(SB)
1364 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1365 MOVD $56, R25
1366 JMP gcWriteBarrier<>(SB)
1367 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1368 MOVD $64, R25
1369 JMP gcWriteBarrier<>(SB)
1370
1371 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1372 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1373
1374 // debugCallV2 is the entry point for debugger-injected function
1375 // calls on running goroutines. It informs the runtime that a
1376 // debug call has been injected and creates a call frame for the
1377 // debugger to fill in.
1378 //
1379 // To inject a function call, a debugger should:
1380 // 1. Check that the goroutine is in state _Grunning and that
1381 // there are at least 288 bytes free on the stack.
1382 // 2. Set SP as SP-16.
1383 // 3. Store the current LR in (SP) (using the SP after step 2).
1384 // 4. Store the current PC in the LR register.
1385 // 5. Write the desired argument frame size at SP-16
1386 // 6. Save all machine registers (including flags and fpsimd registers)
1387 // so they can be restored later by the debugger.
1388 // 7. Set the PC to debugCallV2 and resume execution.
1389 //
1390 // If the goroutine is in state _Grunnable, then it's not generally
1391 // safe to inject a call because it may return out via other runtime
1392 // operations. Instead, the debugger should unwind the stack to find
1393 // the return to non-runtime code, add a temporary breakpoint there,
1394 // and inject the call once that breakpoint is hit.
1395 //
1396 // If the goroutine is in any other state, it's not safe to inject a call.
1397 //
1398 // This function communicates back to the debugger by setting R20 and
1399 // invoking BRK to raise a breakpoint signal. Note that the signal PC of
1400 // the signal triggered by the BRK instruction is the PC where the signal
1401 // is trapped, not the next PC, so to resume execution, the debugger needs
1402 // to set the signal PC to PC+4. See the comments in the implementation for
1403 // the protocol the debugger is expected to follow. InjectDebugCall in the
1404 // runtime tests demonstrates this protocol.
1405 //
1406 // The debugger must ensure that any pointers passed to the function
1407 // obey escape analysis requirements. Specifically, it must not pass
1408 // a stack pointer to an escaping argument. debugCallV2 cannot check
1409 // this invariant.
1410 //
1411 // This is ABIInternal because Go code injects its PC directly into new
1412 // goroutine stacks.
1413 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1414 STP (R29, R30), -280(RSP)
1415 SUB $272, RSP, RSP
1416 SUB $8, RSP, R29
1417 // Save all registers that may contain pointers so they can be
1418 // conservatively scanned.
1419 //
1420 // We can't do anything that might clobber any of these
1421 // registers before this.
1422 STP (R27, g), (30*8)(RSP)
1423 STP (R25, R26), (28*8)(RSP)
1424 STP (R23, R24), (26*8)(RSP)
1425 STP (R21, R22), (24*8)(RSP)
1426 STP (R19, R20), (22*8)(RSP)
1427 STP (R16, R17), (20*8)(RSP)
1428 STP (R14, R15), (18*8)(RSP)
1429 STP (R12, R13), (16*8)(RSP)
1430 STP (R10, R11), (14*8)(RSP)
1431 STP (R8, R9), (12*8)(RSP)
1432 STP (R6, R7), (10*8)(RSP)
1433 STP (R4, R5), (8*8)(RSP)
1434 STP (R2, R3), (6*8)(RSP)
1435 STP (R0, R1), (4*8)(RSP)
1436
1437 // Perform a safe-point check.
1438 MOVD R30, 8(RSP) // Caller's PC
1439 CALL runtime·debugCallCheck(SB)
1440 MOVD 16(RSP), R0
1441 CBZ R0, good
1442
1443 // The safety check failed. Put the reason string at the top
1444 // of the stack.
1445 MOVD R0, 8(RSP)
1446 MOVD 24(RSP), R0
1447 MOVD R0, 16(RSP)
1448
1449 // Set R20 to 8 and invoke BRK. The debugger should get the
1450 // reason a call can't be injected from SP+8 and resume execution.
1451 MOVD $8, R20
1452 BREAK
1453 JMP restore
1454
1455 good:
1456 // Registers are saved and it's safe to make a call.
1457 // Open up a call frame, moving the stack if necessary.
1458 //
1459 // Once the frame is allocated, this will set R20 to 0 and
1460 // invoke BRK. The debugger should write the argument
1461 // frame for the call at SP+8, set up argument registers,
1462 // set the LR as the signal PC + 4, set the PC to the function
1463 // to call, set R26 to point to the closure (if a closure call),
1464 // and resume execution.
1465 //
1466 // If the function returns, this will set R20 to 1 and invoke
1467 // BRK. The debugger can then inspect any return value saved
1468 // on the stack at SP+8 and in registers. To resume execution,
1469 // the debugger should restore the LR from (SP).
1470 //
1471 // If the function panics, this will set R20 to 2 and invoke BRK.
1472 // The interface{} value of the panic will be at SP+8. The debugger
1473 // can inspect the panic value and resume execution again.
1474 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1475 CMP $MAXSIZE, R0; \
1476 BGT 5(PC); \
1477 MOVD $NAME(SB), R0; \
1478 MOVD R0, 8(RSP); \
1479 CALL runtime·debugCallWrap(SB); \
1480 JMP restore
1481
1482 MOVD 256(RSP), R0 // the argument frame size
1483 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1484 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1485 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1486 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1487 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1488 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1489 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1490 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1491 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1492 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1493 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1494 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1495 // The frame size is too large. Report the error.
1496 MOVD $debugCallFrameTooLarge<>(SB), R0
1497 MOVD R0, 8(RSP)
1498 MOVD $20, R0
1499 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
1500 MOVD $8, R20
1501 BREAK
1502 JMP restore
1503
1504 restore:
1505 // Calls and failures resume here.
1506 //
1507 // Set R20 to 16 and invoke BRK. The debugger should restore
1508 // all registers except for PC and RSP and resume execution.
1509 MOVD $16, R20
1510 BREAK
1511 // We must not modify flags after this point.
1512
1513 // Restore pointer-containing registers, which may have been
1514 // modified from the debugger's copy by stack copying.
1515 LDP (30*8)(RSP), (R27, g)
1516 LDP (28*8)(RSP), (R25, R26)
1517 LDP (26*8)(RSP), (R23, R24)
1518 LDP (24*8)(RSP), (R21, R22)
1519 LDP (22*8)(RSP), (R19, R20)
1520 LDP (20*8)(RSP), (R16, R17)
1521 LDP (18*8)(RSP), (R14, R15)
1522 LDP (16*8)(RSP), (R12, R13)
1523 LDP (14*8)(RSP), (R10, R11)
1524 LDP (12*8)(RSP), (R8, R9)
1525 LDP (10*8)(RSP), (R6, R7)
1526 LDP (8*8)(RSP), (R4, R5)
1527 LDP (6*8)(RSP), (R2, R3)
1528 LDP (4*8)(RSP), (R0, R1)
1529
1530 LDP -8(RSP), (R29, R27)
1531 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
1532 MOVD -16(RSP), R30 // restore old lr
1533 JMP (R27)
1534
1535 // runtime.debugCallCheck assumes that functions defined with the
1536 // DEBUG_CALL_FN macro are safe points to inject calls.
1537 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1538 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1539 NO_LOCAL_POINTERS; \
1540 MOVD $0, R20; \
1541 BREAK; \
1542 MOVD $1, R20; \
1543 BREAK; \
1544 RET
1545 DEBUG_CALL_FN(debugCall32<>, 32)
1546 DEBUG_CALL_FN(debugCall64<>, 64)
1547 DEBUG_CALL_FN(debugCall128<>, 128)
1548 DEBUG_CALL_FN(debugCall256<>, 256)
1549 DEBUG_CALL_FN(debugCall512<>, 512)
1550 DEBUG_CALL_FN(debugCall1024<>, 1024)
1551 DEBUG_CALL_FN(debugCall2048<>, 2048)
1552 DEBUG_CALL_FN(debugCall4096<>, 4096)
1553 DEBUG_CALL_FN(debugCall8192<>, 8192)
1554 DEBUG_CALL_FN(debugCall16384<>, 16384)
1555 DEBUG_CALL_FN(debugCall32768<>, 32768)
1556 DEBUG_CALL_FN(debugCall65536<>, 65536)
1557
1558 // func debugCallPanicked(val interface{})
1559 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1560 // Copy the panic value to the top of stack at SP+8.
1561 MOVD val_type+0(FP), R0
1562 MOVD R0, 8(RSP)
1563 MOVD val_data+8(FP), R0
1564 MOVD R0, 16(RSP)
1565 MOVD $2, R20
1566 BREAK
1567 RET
1568
1569 // Note: these functions use a special calling convention to save generated code space.
1570 // Arguments are passed in registers, but the space for those arguments are allocated
1571 // in the caller's stack frame. These stubs write the args into that stack space and
1572 // then tail call to the corresponding runtime handler.
1573 // The tail call makes these stubs disappear in backtraces.
1574 //
1575 // Defined as ABIInternal since the compiler generates ABIInternal
1576 // calls to it directly and it does not use the stack-based Go ABI.
1577 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1578 JMP runtime·goPanicIndex<ABIInternal>(SB)
1579 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1580 JMP runtime·goPanicIndexU<ABIInternal>(SB)
1581 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1582 MOVD R1, R0
1583 MOVD R2, R1
1584 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
1585 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
1586 MOVD R1, R0
1587 MOVD R2, R1
1588 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
1589 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
1590 MOVD R1, R0
1591 MOVD R2, R1
1592 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
1593 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
1594 MOVD R1, R0
1595 MOVD R2, R1
1596 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
1597 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
1598 JMP runtime·goPanicSliceB<ABIInternal>(SB)
1599 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
1600 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
1601 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
1602 MOVD R2, R0
1603 MOVD R3, R1
1604 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
1605 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
1606 MOVD R2, R0
1607 MOVD R3, R1
1608 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
1609 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
1610 MOVD R2, R0
1611 MOVD R3, R1
1612 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
1613 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
1614 MOVD R2, R0
1615 MOVD R3, R1
1616 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
1617 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
1618 MOVD R1, R0
1619 MOVD R2, R1
1620 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
1621 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
1622 MOVD R1, R0
1623 MOVD R2, R1
1624 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
1625 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
1626 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
1627 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
1628 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
1629 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
1630 MOVD R2, R0
1631 MOVD R3, R1
1632 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
1633
1634 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1635 MOVD R29, R0
1636 RET
1637
View as plain text