Text file
src/runtime/memclr_arm64.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // See memclrNoHeapPointers Go doc for important implementation constraints.
8
9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
10 // Also called from assembly in sys_windows_arm64.s without g (but using Go stack convention).
11 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
12 CMP $16, R1
13 // If n is equal to 16 bytes, use zero_exact_16 to zero
14 BEQ zero_exact_16
15
16 // If n is greater than 16 bytes, use zero_by_16 to zero
17 BHI zero_by_16
18
19 // n is less than 16 bytes
20 ADD R1, R0, R7
21 TBZ $3, R1, less_than_8
22 MOVD ZR, (R0)
23 MOVD ZR, -8(R7)
24 RET
25
26 less_than_8:
27 TBZ $2, R1, less_than_4
28 MOVW ZR, (R0)
29 MOVW ZR, -4(R7)
30 RET
31
32 less_than_4:
33 CBZ R1, ending
34 MOVB ZR, (R0)
35 TBZ $1, R1, ending
36 MOVH ZR, -2(R7)
37
38 ending:
39 RET
40
41 zero_exact_16:
42 // n is exactly 16 bytes
43 STP (ZR, ZR), (R0)
44 RET
45
46 zero_by_16:
47 // n greater than 16 bytes, check if the start address is aligned
48 NEG R0, R4
49 ANDS $15, R4, R4
50 // Try zeroing using zva if the start address is aligned with 16
51 BEQ try_zva
52
53 // Non-aligned store
54 STP (ZR, ZR), (R0)
55 // Make the destination aligned
56 SUB R4, R1, R1
57 ADD R4, R0, R0
58 B try_zva
59
60 tail_maybe_long:
61 CMP $64, R1
62 BHS no_zva
63
64 tail63:
65 ANDS $48, R1, R3
66 BEQ last16
67 CMPW $32, R3
68 BEQ last48
69 BLT last32
70 STP.P (ZR, ZR), 16(R0)
71 last48:
72 STP.P (ZR, ZR), 16(R0)
73 last32:
74 STP.P (ZR, ZR), 16(R0)
75 // The last store length is at most 16, so it is safe to use
76 // stp to write last 16 bytes
77 last16:
78 ANDS $15, R1, R1
79 CBZ R1, last_end
80 ADD R1, R0, R0
81 STP (ZR, ZR), -16(R0)
82 last_end:
83 RET
84
85 PCALIGN $16
86 no_zva:
87 SUB $16, R0, R0
88 SUB $64, R1, R1
89
90 loop_64:
91 STP (ZR, ZR), 16(R0)
92 STP (ZR, ZR), 32(R0)
93 STP (ZR, ZR), 48(R0)
94 STP.W (ZR, ZR), 64(R0)
95 SUBS $64, R1, R1
96 BGE loop_64
97 ANDS $63, R1, ZR
98 ADD $16, R0, R0
99 BNE tail63
100 RET
101
102 PCALIGN $16
103 try_zva:
104 // Try using the ZVA feature to zero entire cache lines
105 // It is not meaningful to use ZVA if the block size is less than 64,
106 // so make sure that n is greater than or equal to 64
107 CMP $63, R1
108 BLE tail63
109
110 CMP $128, R1
111 // Ensure n is at least 128 bytes, so that there is enough to copy after
112 // alignment.
113 BLT no_zva
114 // Check if ZVA is allowed from user code, and if so get the block size
115 MOVW block_size<>(SB), R5
116 TBNZ $31, R5, no_zva
117 CBNZ R5, zero_by_line
118 // DCZID_EL0 bit assignments
119 // [63:5] Reserved
120 // [4] DZP, if bit set DC ZVA instruction is prohibited, else permitted
121 // [3:0] log2 of the block size in words, eg. if it returns 0x4 then block size is 16 words
122 MRS DCZID_EL0, R3
123 TBZ $4, R3, init
124 // ZVA not available
125 MOVW $~0, R5
126 MOVW R5, block_size<>(SB)
127 B no_zva
128
129 PCALIGN $16
130 init:
131 MOVW $4, R9
132 ANDW $15, R3, R5
133 LSLW R5, R9, R5
134 MOVW R5, block_size<>(SB)
135
136 ANDS $63, R5, R9
137 // Block size is less than 64.
138 BNE no_zva
139
140 PCALIGN $16
141 zero_by_line:
142 CMP R5, R1
143 // Not enough memory to reach alignment
144 BLO no_zva
145 SUB $1, R5, R6
146 NEG R0, R4
147 ANDS R6, R4, R4
148 // Already aligned
149 BEQ aligned
150
151 // check there is enough to copy after alignment
152 SUB R4, R1, R3
153
154 // Check that the remaining length to ZVA after alignment
155 // is greater than 64.
156 CMP $64, R3
157 CCMP GE, R3, R5, $10 // condition code GE, NZCV=0b1010
158 BLT no_zva
159
160 // We now have at least 64 bytes to zero, update n
161 MOVD R3, R1
162
163 loop_zva_prolog:
164 STP (ZR, ZR), (R0)
165 STP (ZR, ZR), 16(R0)
166 STP (ZR, ZR), 32(R0)
167 SUBS $64, R4, R4
168 STP (ZR, ZR), 48(R0)
169 ADD $64, R0, R0
170 BGE loop_zva_prolog
171
172 ADD R4, R0, R0
173
174 aligned:
175 SUB R5, R1, R1
176
177 PCALIGN $16
178 loop_zva:
179 WORD $0xd50b7420 // DC ZVA, R0
180 ADD R5, R0, R0
181 SUBS R5, R1, R1
182 BHS loop_zva
183 ANDS R6, R1, R1
184 BNE tail_maybe_long
185 RET
186
187 GLOBL block_size<>(SB), NOPTR, $8
188
View as plain text