1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 TEXT ·Index(SB),NOSPLIT,$0-56
9 MOVD a_base+0(FP), R0
10 MOVD a_len+8(FP), R1
11 MOVD b_base+24(FP), R2
12 MOVD b_len+32(FP), R3
13 MOVD $ret+48(FP), R9
14 B indexbody<>(SB)
15
16 TEXT ·IndexString(SB),NOSPLIT,$0-40
17 MOVD a_base+0(FP), R0
18 MOVD a_len+8(FP), R1
19 MOVD b_base+16(FP), R2
20 MOVD b_len+24(FP), R3
21 MOVD $ret+32(FP), R9
22 B indexbody<>(SB)
23
24 // input:
25 // R0: haystack
26 // R1: length of haystack
27 // R2: needle
28 // R3: length of needle (2 <= len <= 32)
29 // R9: address to put result
30 TEXT indexbody<>(SB),NOSPLIT,$0-56
31 // main idea is to load 'sep' into separate register(s)
32 // to avoid repeatedly re-load it again and again
33 // for sebsequent substring comparisons
34 SUB R3, R1, R4
35 // R4 contains the start of last substring for comparison
36 ADD R0, R4, R4
37 ADD $1, R0, R8
38
39 CMP $8, R3
40 BHI greater_8
41 TBZ $3, R3, len_2_7
42 len_8:
43 // R5 contains 8-byte of sep
44 MOVD (R2), R5
45 loop_8:
46 // R6 contains substring for comparison
47 CMP R4, R0
48 BHI not_found
49 MOVD.P 1(R0), R6
50 CMP R5, R6
51 BNE loop_8
52 B found
53 len_2_7:
54 TBZ $2, R3, len_2_3
55 TBZ $1, R3, len_4_5
56 TBZ $0, R3, len_6
57 len_7:
58 // R5 and R6 contain 7-byte of sep
59 MOVWU (R2), R5
60 // 1-byte overlap with R5
61 MOVWU 3(R2), R6
62 loop_7:
63 CMP R4, R0
64 BHI not_found
65 MOVWU.P 1(R0), R3
66 CMP R5, R3
67 BNE loop_7
68 MOVWU 2(R0), R3
69 CMP R6, R3
70 BNE loop_7
71 B found
72 len_6:
73 // R5 and R6 contain 6-byte of sep
74 MOVWU (R2), R5
75 MOVHU 4(R2), R6
76 loop_6:
77 CMP R4, R0
78 BHI not_found
79 MOVWU.P 1(R0), R3
80 CMP R5, R3
81 BNE loop_6
82 MOVHU 3(R0), R3
83 CMP R6, R3
84 BNE loop_6
85 B found
86 len_4_5:
87 TBZ $0, R3, len_4
88 len_5:
89 // R5 and R7 contain 5-byte of sep
90 MOVWU (R2), R5
91 MOVBU 4(R2), R7
92 loop_5:
93 CMP R4, R0
94 BHI not_found
95 MOVWU.P 1(R0), R3
96 CMP R5, R3
97 BNE loop_5
98 MOVBU 3(R0), R3
99 CMP R7, R3
100 BNE loop_5
101 B found
102 len_4:
103 // R5 contains 4-byte of sep
104 MOVWU (R2), R5
105 loop_4:
106 CMP R4, R0
107 BHI not_found
108 MOVWU.P 1(R0), R6
109 CMP R5, R6
110 BNE loop_4
111 B found
112 len_2_3:
113 TBZ $0, R3, len_2
114 len_3:
115 // R6 and R7 contain 3-byte of sep
116 MOVHU (R2), R6
117 MOVBU 2(R2), R7
118 loop_3:
119 CMP R4, R0
120 BHI not_found
121 MOVHU.P 1(R0), R3
122 CMP R6, R3
123 BNE loop_3
124 MOVBU 1(R0), R3
125 CMP R7, R3
126 BNE loop_3
127 B found
128 len_2:
129 // R5 contains 2-byte of sep
130 MOVHU (R2), R5
131 loop_2:
132 CMP R4, R0
133 BHI not_found
134 MOVHU.P 1(R0), R6
135 CMP R5, R6
136 BNE loop_2
137 found:
138 SUB R8, R0, R0
139 MOVD R0, (R9)
140 RET
141 not_found:
142 MOVD $-1, R0
143 MOVD R0, (R9)
144 RET
145 greater_8:
146 SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes
147 CMP $16, R3
148 BHI greater_16
149 len_9_16:
150 MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep
151 SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes
152 MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep
153 loop_9_16:
154 // search the first 8 bytes first
155 CMP R4, R0
156 BHI not_found
157 MOVD.P 1(R0), R7
158 CMP R5, R7
159 BNE loop_9_16
160 MOVD (R0)(R11), R7
161 CMP R6, R7 // compare the last 8 bytes
162 BNE loop_9_16
163 B found
164 greater_16:
165 CMP $24, R3
166 BHI len_25_32
167 len_17_24:
168 LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep
169 SUB $24, R3, R10 // len(sep) - 24
170 MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep
171 loop_17_24:
172 // search the first 16 bytes first
173 CMP R4, R0
174 BHI not_found
175 MOVD.P 1(R0), R10
176 CMP R5, R10
177 BNE loop_17_24
178 MOVD 7(R0), R10
179 CMP R6, R10
180 BNE loop_17_24
181 MOVD (R0)(R11), R10
182 CMP R7, R10 // compare the last 8 bytes
183 BNE loop_17_24
184 B found
185 len_25_32:
186 LDP.P 16(R2), (R5, R6)
187 MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep
188 SUB $32, R3, R12 // len(sep) - 32
189 MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep
190 loop_25_32:
191 // search the first 24 bytes first
192 CMP R4, R0
193 BHI not_found
194 MOVD.P 1(R0), R12
195 CMP R5, R12
196 BNE loop_25_32
197 MOVD 7(R0), R12
198 CMP R6, R12
199 BNE loop_25_32
200 MOVD 15(R0), R12
201 CMP R7, R12
202 BNE loop_25_32
203 MOVD (R0)(R11), R12
204 CMP R10, R12 // compare the last 8 bytes
205 BNE loop_25_32
206 B found
207
View as plain text