1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // Caller must confirm availability of vx facility before calling.
9 TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
10 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
11 LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
12 MOVD $ret+48(FP), R5
13 BR indexbody<>(SB)
14
15 // Caller must confirm availability of vx facility before calling.
16 TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
17 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
18 LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
19 MOVD $ret+32(FP), R5
20 BR indexbody<>(SB)
21
22 // s: string we are searching
23 // sep: string to search for
24 // R1=&s[0], R2=len(s)
25 // R3=&sep[0], R4=len(sep)
26 // R5=&ret (int)
27 // Caller must confirm availability of vx facility before calling.
28 TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
29 CMPBGT R4, R2, notfound
30 ADD R1, R2
31 SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
32 CMPBEQ R4, $0, notfound
33 SUB $1, R4 // R4=len(sep)-1 for use as VLL index
34 VLL R4, (R3), V0 // contains first 16 bytes of sep
35 MOVD R1, R7
36 index2plus:
37 CMPBNE R4, $1, index3plus
38 MOVD $15(R7), R9
39 CMPBGE R9, R2, index2to16
40 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
41 VONE V16
42 VREPH $0, V0, V1
43 CMPBGE R9, R2, index2to16
44 index2loop:
45 VL 0(R7), V2 // 16 bytes, even indices
46 VL 1(R7), V4 // 16 bytes, odd indices
47 VCEQH V1, V2, V5 // compare even indices
48 VCEQH V1, V4, V6 // compare odd indices
49 VSEL V5, V6, V31, V7 // merge even and odd indices
50 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
51 BLT foundV17
52 MOVD $16(R7), R7 // R7+=16
53 ADD $15, R7, R9
54 CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
55 CMPBLE R7, R2, index2to16
56 BR notfound
57
58 index3plus:
59 CMPBNE R4, $2, index4plus
60 ADD $15, R7, R9
61 CMPBGE R9, R2, index2to16
62 MOVD $1, R0
63 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
64 VONE V16
65 VREPH $0, V0, V1
66 VREPB $2, V0, V8
67 index3loop:
68 VL (R7), V2 // load 16-bytes into V2
69 VLL R0, 16(R7), V3 // load 2-bytes into V3
70 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
71 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2
72 VCEQH V1, V2, V5 // compare 2-byte even indices
73 VCEQH V1, V4, V6 // compare 2-byte odd indices
74 VCEQB V8, V9, V10 // compare last bytes
75 VSEL V5, V6, V31, V7 // merge even and odd indices
76 VN V7, V10, V7 // AND indices with last byte
77 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
78 BLT foundV17
79 MOVD $16(R7), R7 // R7+=16
80 ADD $15, R7, R9
81 CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
82 CMPBLE R7, R2, index2to16
83 BR notfound
84
85 index4plus:
86 CMPBNE R4, $3, index5plus
87 ADD $15, R7, R9
88 CMPBGE R9, R2, index2to16
89 MOVD $2, R0
90 VGBM $0x8888, V29 // 0xff000000ff000000...
91 VGBM $0x2222, V30 // 0x0000ff000000ff00...
92 VGBM $0xcccc, V31 // 0xffff0000ffff0000...
93 VONE V16
94 VREPF $0, V0, V1
95 index4loop:
96 VL (R7), V2 // load 16-bytes into V2
97 VLL R0, 16(R7), V3 // load 3-bytes into V3
98 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
99 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1
100 VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1
101 VCEQF V1, V2, V5 // compare index 0, 4, ...
102 VCEQF V1, V4, V6 // compare index 1, 5, ...
103 VCEQF V1, V9, V11 // compare index 2, 6, ...
104 VCEQF V1, V10, V12 // compare index 3, 7, ...
105 VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ...
106 VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
107 VSEL V13, V14, V31, V7 // final merge
108 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
109 BLT foundV17
110 MOVD $16(R7), R7 // R7+=16
111 ADD $15, R7, R9
112 CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
113 CMPBLE R7, R2, index2to16
114 BR notfound
115
116 index5plus:
117 CMPBGT R4, $15, index17plus
118 index2to16:
119 CMPBGT R7, R2, notfound
120 MOVD $1(R7), R8
121 CMPBGT R8, R2, index2to16tail
122 index2to16loop:
123 // unrolled 2x
124 VLL R4, (R7), V1
125 VLL R4, 1(R7), V2
126 VCEQGS V0, V1, V3
127 BEQ found
128 MOVD $1(R7), R7
129 VCEQGS V0, V2, V4
130 BEQ found
131 MOVD $1(R7), R7
132 CMPBLT R7, R2, index2to16loop
133 CMPBGT R7, R2, notfound
134 index2to16tail:
135 VLL R4, (R7), V1
136 VCEQGS V0, V1, V2
137 BEQ found
138 BR notfound
139
140 index17plus:
141 CMPBGT R4, $31, index33plus
142 SUB $16, R4, R0
143 VLL R0, 16(R3), V1
144 VONE V7
145 index17to32loop:
146 VL (R7), V2
147 VLL R0, 16(R7), V3
148 VCEQG V0, V2, V4
149 VCEQG V1, V3, V5
150 VN V4, V5, V6
151 VCEQGS V6, V7, V8
152 BEQ found
153 MOVD $1(R7), R7
154 CMPBLE R7, R2, index17to32loop
155 BR notfound
156
157 index33plus:
158 CMPBGT R4, $47, index49plus
159 SUB $32, R4, R0
160 VL 16(R3), V1
161 VLL R0, 32(R3), V2
162 VONE V11
163 index33to48loop:
164 VL (R7), V3
165 VL 16(R7), V4
166 VLL R0, 32(R7), V5
167 VCEQG V0, V3, V6
168 VCEQG V1, V4, V7
169 VCEQG V2, V5, V8
170 VN V6, V7, V9
171 VN V8, V9, V10
172 VCEQGS V10, V11, V12
173 BEQ found
174 MOVD $1(R7), R7
175 CMPBLE R7, R2, index33to48loop
176 BR notfound
177
178 index49plus:
179 CMPBGT R4, $63, index65plus
180 SUB $48, R4, R0
181 VL 16(R3), V1
182 VL 32(R3), V2
183 VLL R0, 48(R3), V3
184 VONE V15
185 index49to64loop:
186 VL (R7), V4
187 VL 16(R7), V5
188 VL 32(R7), V6
189 VLL R0, 48(R7), V7
190 VCEQG V0, V4, V8
191 VCEQG V1, V5, V9
192 VCEQG V2, V6, V10
193 VCEQG V3, V7, V11
194 VN V8, V9, V12
195 VN V10, V11, V13
196 VN V12, V13, V14
197 VCEQGS V14, V15, V16
198 BEQ found
199 MOVD $1(R7), R7
200 CMPBLE R7, R2, index49to64loop
201 notfound:
202 MOVD $-1, (R5)
203 RET
204
205 index65plus:
206 // not implemented
207 MOVD $0, (R0)
208 RET
209
210 foundV17: // index is in doubleword V17[0]
211 VLGVG $0, V17, R8
212 ADD R8, R7
213 found:
214 SUB R1, R7
215 MOVD R7, (R5)
216 RET
217
View as plain text