1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
9 // X10 = a_base
10 // X11 = a_len
11 // X12 = a_cap (unused)
12 // X13 = b_base (want in X12)
13 // X14 = b_len (want in X13)
14 // X15 = b_cap (unused)
15 MOV X13, X12
16 MOV X14, X13
17 JMP compare<>(SB)
18
19 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
20 // X10 = a_base
21 // X11 = a_len
22 // X12 = b_base
23 // X13 = b_len
24 JMP compare<>(SB)
25
26 // On entry:
27 // X10 points to start of a
28 // X11 length of a
29 // X12 points to start of b
30 // X13 length of b
31 // for non-regabi X14 points to the address to store the return value (-1/0/1)
32 // for regabi the return value in X10
33 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
34 BEQ X10, X12, cmp_len
35
36 MOV X11, X5
37 BGE X13, X5, use_a_len // X5 = min(len(a), len(b))
38 MOV X13, X5
39 use_a_len:
40 BEQZ X5, cmp_len
41
42 MOV $32, X6
43 BLT X5, X6, check8_unaligned
44
45 // Check alignment - if alignment differs we have to do one byte at a time.
46 AND $7, X10, X7
47 AND $7, X12, X8
48 BNE X7, X8, check8_unaligned
49 BEQZ X7, compare32
50
51 // Check one byte at a time until we reach 8 byte alignment.
52 SUB X7, X0, X7
53 ADD $8, X7, X7
54 SUB X7, X5, X5
55 align:
56 SUB $1, X7
57 MOVBU 0(X10), X8
58 MOVBU 0(X12), X9
59 BNE X8, X9, cmp
60 ADD $1, X10
61 ADD $1, X12
62 BNEZ X7, align
63
64 check32:
65 // X6 contains $32
66 BLT X5, X6, compare16
67 compare32:
68 MOV 0(X10), X15
69 MOV 0(X12), X16
70 MOV 8(X10), X17
71 MOV 8(X12), X18
72 BNE X15, X16, cmp8a
73 BNE X17, X18, cmp8b
74 MOV 16(X10), X15
75 MOV 16(X12), X16
76 MOV 24(X10), X17
77 MOV 24(X12), X18
78 BNE X15, X16, cmp8a
79 BNE X17, X18, cmp8b
80 ADD $32, X10
81 ADD $32, X12
82 SUB $32, X5
83 BGE X5, X6, compare32
84 BEQZ X5, cmp_len
85
86 check16:
87 MOV $16, X6
88 BLT X5, X6, check8_unaligned
89 compare16:
90 MOV 0(X10), X15
91 MOV 0(X12), X16
92 MOV 8(X10), X17
93 MOV 8(X12), X18
94 BNE X15, X16, cmp8a
95 BNE X17, X18, cmp8b
96 ADD $16, X10
97 ADD $16, X12
98 SUB $16, X5
99 BEQZ X5, cmp_len
100
101 check8_unaligned:
102 MOV $8, X6
103 BLT X5, X6, check4_unaligned
104 compare8_unaligned:
105 MOVBU 0(X10), X8
106 MOVBU 1(X10), X15
107 MOVBU 2(X10), X17
108 MOVBU 3(X10), X19
109 MOVBU 4(X10), X21
110 MOVBU 5(X10), X23
111 MOVBU 6(X10), X25
112 MOVBU 7(X10), X29
113 MOVBU 0(X12), X9
114 MOVBU 1(X12), X16
115 MOVBU 2(X12), X18
116 MOVBU 3(X12), X20
117 MOVBU 4(X12), X22
118 MOVBU 5(X12), X24
119 MOVBU 6(X12), X28
120 MOVBU 7(X12), X30
121 BNE X8, X9, cmp1a
122 BNE X15, X16, cmp1b
123 BNE X17, X18, cmp1c
124 BNE X19, X20, cmp1d
125 BNE X21, X22, cmp1e
126 BNE X23, X24, cmp1f
127 BNE X25, X28, cmp1g
128 BNE X29, X30, cmp1h
129 ADD $8, X10
130 ADD $8, X12
131 SUB $8, X5
132 BGE X5, X6, compare8_unaligned
133 BEQZ X5, cmp_len
134
135 check4_unaligned:
136 MOV $4, X6
137 BLT X5, X6, compare1
138 compare4_unaligned:
139 MOVBU 0(X10), X8
140 MOVBU 1(X10), X15
141 MOVBU 2(X10), X17
142 MOVBU 3(X10), X19
143 MOVBU 0(X12), X9
144 MOVBU 1(X12), X16
145 MOVBU 2(X12), X18
146 MOVBU 3(X12), X20
147 BNE X8, X9, cmp1a
148 BNE X15, X16, cmp1b
149 BNE X17, X18, cmp1c
150 BNE X19, X20, cmp1d
151 ADD $4, X10
152 ADD $4, X12
153 SUB $4, X5
154 BGE X5, X6, compare4_unaligned
155
156 compare1:
157 BEQZ X5, cmp_len
158 MOVBU 0(X10), X8
159 MOVBU 0(X12), X9
160 BNE X8, X9, cmp
161 ADD $1, X10
162 ADD $1, X12
163 SUB $1, X5
164 JMP compare1
165
166 // Compare 8 bytes of memory in X15/X16 that are known to differ.
167 cmp8a:
168 MOV X15, X17
169 MOV X16, X18
170
171 // Compare 8 bytes of memory in X17/X18 that are known to differ.
172 cmp8b:
173 MOV $0xff, X19
174 cmp8_loop:
175 AND X17, X19, X8
176 AND X18, X19, X9
177 BNE X8, X9, cmp
178 SLLI $8, X19
179 JMP cmp8_loop
180
181 cmp1a:
182 SLTU X9, X8, X5
183 SLTU X8, X9, X6
184 JMP cmp_ret
185 cmp1b:
186 SLTU X16, X15, X5
187 SLTU X15, X16, X6
188 JMP cmp_ret
189 cmp1c:
190 SLTU X18, X17, X5
191 SLTU X17, X18, X6
192 JMP cmp_ret
193 cmp1d:
194 SLTU X20, X19, X5
195 SLTU X19, X20, X6
196 JMP cmp_ret
197 cmp1e:
198 SLTU X22, X21, X5
199 SLTU X21, X22, X6
200 JMP cmp_ret
201 cmp1f:
202 SLTU X24, X23, X5
203 SLTU X23, X24, X6
204 JMP cmp_ret
205 cmp1g:
206 SLTU X28, X25, X5
207 SLTU X25, X28, X6
208 JMP cmp_ret
209 cmp1h:
210 SLTU X30, X29, X5
211 SLTU X29, X30, X6
212 JMP cmp_ret
213
214 cmp_len:
215 MOV X11, X8
216 MOV X13, X9
217 cmp:
218 SLTU X9, X8, X5
219 SLTU X8, X9, X6
220 cmp_ret:
221 SUB X5, X6, X10
222 RET
223
View as plain text