1
2
3
4
5 package ssa
6
7 import (
8 "fmt"
9 "internal/goarch"
10 "slices"
11 )
12
13 var truthTableValues [3]uint8 = [3]uint8{0b1111_0000, 0b1100_1100, 0b1010_1010}
14
15 func (slop SIMDLogicalOP) String() string {
16 if slop == sloInterior {
17 return "leaf"
18 }
19 interior := ""
20 if slop&sloInterior != 0 {
21 interior = "+interior"
22 }
23 switch slop &^ sloInterior {
24 case sloAnd:
25 return "and" + interior
26 case sloXor:
27 return "xor" + interior
28 case sloOr:
29 return "or" + interior
30 case sloAndNot:
31 return "andNot" + interior
32 case sloNot:
33 return "not" + interior
34 }
35 return "wrong"
36 }
37
38 func rewriteTern(f *Func) {
39 if f.maxCPUFeatures == CPUNone {
40 return
41 }
42
43 arch := f.Config.Ctxt().Arch.Family
44
45 if arch != goarch.AMD64 {
46 return
47 }
48
49 boolExprTrees := make(map[*Value]SIMDLogicalOP)
50
51
52
53
54
55 for _, b := range f.Blocks {
56 for _, v := range b.Values {
57 slo := classifyBooleanSIMD(v)
58 switch slo {
59 case sloOr,
60 sloAndNot,
61 sloXor,
62 sloAnd:
63 boolExprTrees[v.Args[1]] |= sloInterior
64 fallthrough
65 case sloNot:
66 boolExprTrees[v.Args[0]] |= sloInterior
67 boolExprTrees[v] |= slo
68 }
69 }
70 }
71
72
73 var roots []*Value
74 for v, slo := range boolExprTrees {
75 if f.pass.debug > 1 {
76 f.Warnl(v.Pos, "%s has SLO %v", v.LongString(), slo)
77 }
78
79 if slo&sloInterior == 0 && v.Block.CPUfeatures.hasFeature(CPUavx512) {
80 roots = append(roots, v)
81 }
82 }
83 slices.SortFunc(roots, func(u, v *Value) int { return int(u.ID - v.ID) })
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 var rewrite func(v *Value) [3]*Value
106
107
108
109
110
111
112
113
114
115 var computeTT func(v *Value, vars [3]*Value) uint8
116
117
118
119
120
121 combine := func(a, b [3]*Value) ([3]*Value, bool) {
122 var c [3]*Value
123 i := 0
124 for _, v := range a {
125 if v == nil {
126 break
127 }
128 c[i] = v
129 i++
130 }
131 bloop:
132 for _, v := range b {
133 if v == nil {
134 break
135 }
136 for _, u := range a {
137 if v == u {
138 continue bloop
139 }
140 }
141 if i == 3 {
142 return [3]*Value{}, false
143 }
144 c[i] = v
145 i++
146 }
147 return c, true
148 }
149
150 computeTT = func(v *Value, vars [3]*Value) uint8 {
151 i := 0
152 for ; i < len(vars); i++ {
153 if vars[i] == v {
154 return truthTableValues[i]
155 }
156 }
157 slo := boolExprTrees[v] &^ sloInterior
158 a := computeTT(v.Args[0], vars)
159 switch slo {
160 case sloNot:
161 return ^a
162 case sloAnd:
163 return a & computeTT(v.Args[1], vars)
164 case sloXor:
165 return a ^ computeTT(v.Args[1], vars)
166 case sloOr:
167 return a | computeTT(v.Args[1], vars)
168 case sloAndNot:
169 return a & ^computeTT(v.Args[1], vars)
170 }
171 panic("switch should have covered all cases, or unknown var in logical expression")
172 }
173
174 replace := func(a0 *Value, vars0 [3]*Value) {
175 imm := computeTT(a0, vars0)
176 op := ternOpForLogical(a0.Op)
177 if op == a0.Op {
178 panic(fmt.Errorf("should have mapped away from input op, a0 is %s", a0.LongString()))
179 }
180 if f.pass.debug > 0 {
181 f.Warnl(a0.Pos, "Rewriting %s into %v of 0b%b %v %v %v", a0.LongString(), op, imm,
182 vars0[0], vars0[1], vars0[2])
183 }
184 a0.reset(op)
185 a0.SetArgs3(vars0[0], vars0[1], vars0[2])
186 a0.AuxInt = int64(int8(imm))
187 }
188
189
190
191
192
193 addOne := func(vars [3]*Value, v *Value) [3]*Value {
194 if vars[2] != nil {
195 panic("rewriteTern.addOne, vars[2] should be nil")
196 }
197 if v == vars[0] || v == vars[1] {
198 return vars
199 }
200 if vars[1] == nil {
201 vars[1] = v
202 } else {
203 vars[2] = v
204 }
205 return vars
206 }
207
208 rewrite = func(v *Value) [3]*Value {
209 slo := boolExprTrees[v]
210 if slo == sloInterior {
211 return [3]*Value{v, nil, nil}
212 }
213 var vars [3]*Value
214 hasFeature := v.Block.CPUfeatures.hasFeature(CPUavx512)
215 if slo&sloNot == sloNot {
216 vars = rewrite(v.Args[0])
217 if !hasFeature {
218 if vars[2] != nil {
219 replace(v.Args[0], vars)
220 return [3]*Value{v, nil, nil}
221 }
222 return vars
223 }
224 } else {
225 var ok bool
226 a0, a1 := v.Args[0], v.Args[1]
227 vars0 := rewrite(a0)
228 vars1 := rewrite(a1)
229 vars, ok = combine(vars0, vars1)
230
231 if f.pass.debug > 1 {
232 f.Warnl(a0.Pos, "combine(%v, %v) -> %v, %v", vars0, vars1, vars, ok)
233 }
234
235 if !(ok && v.Block.CPUfeatures.hasFeature(CPUavx512)) {
236
237
238 if vars0[2] != nil && a0.Block.CPUfeatures.hasFeature(CPUavx512) {
239 replace(a0, vars0)
240 }
241 if vars1[2] != nil && a1.Block.CPUfeatures.hasFeature(CPUavx512) {
242 replace(a1, vars1)
243 }
244
245
246
247
248
249 if vars0[2] == nil {
250 if vars1[2] == nil {
251
252
253
254
255
256
257
258
259
260
261 return [3]*Value{a0, a1, nil}
262 } else {
263
264 vars = addOne(vars0, a1)
265 }
266 } else if vars1[2] == nil {
267
268 vars = addOne(vars1, a0)
269 } else if !ok {
270
271
272
273 return [3]*Value{a0, a1, nil}
274 }
275
276 }
277 }
278
279 if slo&sloInterior == 0 && vars[2] != nil && hasFeature {
280 replace(v, vars)
281 return [3]*Value{v, nil, nil}
282 }
283 return vars
284 }
285
286 for _, v := range roots {
287 if f.pass.debug > 1 {
288 f.Warnl(v.Pos, "SLO root %s", v.LongString())
289 }
290 rewrite(v)
291 }
292 }
293
View as plain text