Text file
src/runtime/memclr_arm64.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // See memclrNoHeapPointers Go doc for important implementation constraints.
8
9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
10 // Also called from assembly in sys_windows_arm64.s without g (but using Go stack convention).
11 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
12 CMP $16, R1
13 // If n is equal to 16 bytes, use zero_exact_16 to zero
14 BEQ zero_exact_16
15
16 // If n is greater than 16 bytes, use zero_by_16 to zero
17 BHI zero_by_16
18
19 // n is less than 16 bytes
20 ADD R1, R0, R7
21 TBZ $3, R1, less_than_8
22 MOVD ZR, (R0)
23 MOVD ZR, -8(R7)
24 RET
25
26 less_than_8:
27 TBZ $2, R1, less_than_4
28 MOVW ZR, (R0)
29 MOVW ZR, -4(R7)
30 RET
31
32 less_than_4:
33 CBZ R1, ending
34 MOVB ZR, (R0)
35 TBZ $1, R1, ending
36 MOVH ZR, -2(R7)
37
38 ending:
39 RET
40
41 zero_exact_16:
42 // n is exactly 16 bytes
43 STP (ZR, ZR), (R0)
44 RET
45
46 zero_by_16:
47 // n greater than 16 bytes, check if the start address is aligned
48 NEG R0, R4
49 ANDS $15, R4, R4
50 // Try zeroing using zva if the start address is aligned with 16
51 BEQ try_zva
52
53 // Non-aligned store
54 STP (ZR, ZR), (R0)
55 // Make the destination aligned
56 SUB R4, R1, R1
57 ADD R4, R0, R0
58 B try_zva
59
60 tail_maybe_long:
61 CMP $64, R1
62 BHS no_zva
63
64 tail63:
65 ANDS $48, R1, R3
66 BEQ last16
67 CMPW $32, R3
68 BEQ last48
69 BLT last32
70 STP.P (ZR, ZR), 16(R0)
71 last48:
72 STP.P (ZR, ZR), 16(R0)
73 last32:
74 STP.P (ZR, ZR), 16(R0)
75 // The last store length is at most 16, so it is safe to use
76 // stp to write last 16 bytes
77 last16:
78 ANDS $15, R1, R1
79 CBZ R1, last_end
80 ADD R1, R0, R0
81 STP (ZR, ZR), -16(R0)
82 last_end:
83 RET
84
85 no_zva:
86 SUB $16, R0, R0
87 SUB $64, R1, R1
88
89 loop_64:
90 STP (ZR, ZR), 16(R0)
91 STP (ZR, ZR), 32(R0)
92 STP (ZR, ZR), 48(R0)
93 STP.W (ZR, ZR), 64(R0)
94 SUBS $64, R1, R1
95 BGE loop_64
96 ANDS $63, R1, ZR
97 ADD $16, R0, R0
98 BNE tail63
99 RET
100
101 try_zva:
102 // Try using the ZVA feature to zero entire cache lines
103 // It is not meaningful to use ZVA if the block size is less than 64,
104 // so make sure that n is greater than or equal to 64
105 CMP $63, R1
106 BLE tail63
107
108 CMP $128, R1
109 // Ensure n is at least 128 bytes, so that there is enough to copy after
110 // alignment.
111 BLT no_zva
112 // Check if ZVA is allowed from user code, and if so get the block size
113 MOVW block_size<>(SB), R5
114 TBNZ $31, R5, no_zva
115 CBNZ R5, zero_by_line
116 // DCZID_EL0 bit assignments
117 // [63:5] Reserved
118 // [4] DZP, if bit set DC ZVA instruction is prohibited, else permitted
119 // [3:0] log2 of the block size in words, eg. if it returns 0x4 then block size is 16 words
120 MRS DCZID_EL0, R3
121 TBZ $4, R3, init
122 // ZVA not available
123 MOVW $~0, R5
124 MOVW R5, block_size<>(SB)
125 B no_zva
126
127 init:
128 MOVW $4, R9
129 ANDW $15, R3, R5
130 LSLW R5, R9, R5
131 MOVW R5, block_size<>(SB)
132
133 ANDS $63, R5, R9
134 // Block size is less than 64.
135 BNE no_zva
136
137 zero_by_line:
138 CMP R5, R1
139 // Not enough memory to reach alignment
140 BLO no_zva
141 SUB $1, R5, R6
142 NEG R0, R4
143 ANDS R6, R4, R4
144 // Already aligned
145 BEQ aligned
146
147 // check there is enough to copy after alignment
148 SUB R4, R1, R3
149
150 // Check that the remaining length to ZVA after alignment
151 // is greater than 64.
152 CMP $64, R3
153 CCMP GE, R3, R5, $10 // condition code GE, NZCV=0b1010
154 BLT no_zva
155
156 // We now have at least 64 bytes to zero, update n
157 MOVD R3, R1
158
159 loop_zva_prolog:
160 STP (ZR, ZR), (R0)
161 STP (ZR, ZR), 16(R0)
162 STP (ZR, ZR), 32(R0)
163 SUBS $64, R4, R4
164 STP (ZR, ZR), 48(R0)
165 ADD $64, R0, R0
166 BGE loop_zva_prolog
167
168 ADD R4, R0, R0
169
170 aligned:
171 SUB R5, R1, R1
172
173 loop_zva:
174 WORD $0xd50b7420 // DC ZVA, R0
175 ADD R5, R0, R0
176 SUBS R5, R1, R1
177 BHS loop_zva
178 ANDS R6, R1, R1
179 BNE tail_maybe_long
180 RET
181
182 GLOBL block_size<>(SB), NOPTR, $8
183
View as plain text