1
2
3
4
5 package ssa
6
7 import (
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/types"
10 "cmd/internal/src"
11 "cmp"
12 "slices"
13 )
14
15
16
17
18 func memcombine(f *Func) {
19
20
21 if !f.Config.unalignedOK {
22 return
23 }
24
25 memcombineLoads(f)
26 memcombineStores(f)
27 }
28
29 func memcombineLoads(f *Func) {
30
31 mark := f.newSparseSet(f.NumValues())
32 defer f.retSparseSet(mark)
33 var order []*Value
34
35
36 for _, b := range f.Blocks {
37 for _, v := range b.Values {
38 if v.Op == OpOr16 || v.Op == OpOr32 || v.Op == OpOr64 {
39 mark.add(v.Args[0].ID)
40 mark.add(v.Args[1].ID)
41 }
42 }
43 }
44 for _, b := range f.Blocks {
45 order = order[:0]
46 for _, v := range b.Values {
47 if v.Op != OpOr16 && v.Op != OpOr32 && v.Op != OpOr64 {
48 continue
49 }
50 if mark.contains(v.ID) {
51
52 continue
53 }
54
55
56 i := len(order)
57 order = append(order, v)
58 for ; i < len(order); i++ {
59 x := order[i]
60 for j := 0; j < 2; j++ {
61 a := x.Args[j]
62 if a.Op == OpOr16 || a.Op == OpOr32 || a.Op == OpOr64 {
63 order = append(order, a)
64 }
65 }
66 }
67 }
68 for _, v := range order {
69 max := f.Config.RegSize
70 switch v.Op {
71 case OpOr64:
72 case OpOr32:
73 max = 4
74 case OpOr16:
75 max = 2
76 default:
77 continue
78 }
79 for n := max; n > 1; n /= 2 {
80 if combineLoads(v, n) {
81 break
82 }
83 }
84 }
85 }
86 }
87
88
89
90
91 type BaseAddress struct {
92 ptr *Value
93 idx *Value
94 }
95
96
97
98
99
100 func splitPtr(ptr *Value) (BaseAddress, int64) {
101 var idx *Value
102 var off int64
103 for {
104 if ptr.Op == OpOffPtr {
105 off += ptr.AuxInt
106 ptr = ptr.Args[0]
107 } else if ptr.Op == OpAddPtr {
108 if idx != nil {
109
110
111 return BaseAddress{ptr: ptr, idx: idx}, off
112 }
113 idx = ptr.Args[1]
114 if idx.Op == OpAdd32 || idx.Op == OpAdd64 {
115 if idx.Args[0].Op == OpConst32 || idx.Args[0].Op == OpConst64 {
116 off += idx.Args[0].AuxInt
117 idx = idx.Args[1]
118 } else if idx.Args[1].Op == OpConst32 || idx.Args[1].Op == OpConst64 {
119 off += idx.Args[1].AuxInt
120 idx = idx.Args[0]
121 }
122 }
123 ptr = ptr.Args[0]
124 } else {
125 return BaseAddress{ptr: ptr, idx: idx}, off
126 }
127 }
128 }
129
130 func combineLoads(root *Value, n int64) bool {
131 orOp := root.Op
132 var shiftOp Op
133 switch orOp {
134 case OpOr64:
135 shiftOp = OpLsh64x64
136 case OpOr32:
137 shiftOp = OpLsh32x64
138 case OpOr16:
139 shiftOp = OpLsh16x64
140 default:
141 return false
142 }
143
144
145 a := make([]*Value, 0, 8)
146 a = append(a, root)
147 for i := 0; i < len(a) && int64(len(a)) < n; i++ {
148 v := a[i]
149 if v.Uses != 1 && v != root {
150
151 return false
152 }
153 if v.Op == orOp {
154 a[i] = v.Args[0]
155 a = append(a, v.Args[1])
156 i--
157 }
158 }
159 if int64(len(a)) != n {
160 return false
161 }
162
163
164
165 v := a[0]
166 if v.Op == shiftOp {
167 v = v.Args[0]
168 }
169 var extOp Op
170 if orOp == OpOr64 && (v.Op == OpZeroExt8to64 || v.Op == OpZeroExt16to64 || v.Op == OpZeroExt32to64) ||
171 orOp == OpOr32 && (v.Op == OpZeroExt8to32 || v.Op == OpZeroExt16to32) ||
172 orOp == OpOr16 && v.Op == OpZeroExt8to16 {
173 extOp = v.Op
174 v = v.Args[0]
175 } else {
176 return false
177 }
178 if v.Op != OpLoad {
179 return false
180 }
181 base, _ := splitPtr(v.Args[0])
182 mem := v.Args[1]
183 size := v.Type.Size()
184
185 if root.Block.Func.Config.arch == "S390X" {
186
187 if base.ptr.Op == OpAddr {
188 return false
189 }
190 }
191
192
193 type LoadRecord struct {
194 load *Value
195 offset int64
196 shift int64
197 }
198 r := make([]LoadRecord, n, 8)
199 for i := int64(0); i < n; i++ {
200 v := a[i]
201 if v.Uses != 1 {
202 return false
203 }
204 shift := int64(0)
205 if v.Op == shiftOp {
206 if v.Args[1].Op != OpConst64 {
207 return false
208 }
209 shift = v.Args[1].AuxInt
210 v = v.Args[0]
211 if v.Uses != 1 {
212 return false
213 }
214 }
215 if v.Op != extOp {
216 return false
217 }
218 load := v.Args[0]
219 if load.Op != OpLoad {
220 return false
221 }
222 if load.Uses != 1 {
223 return false
224 }
225 if load.Args[1] != mem {
226 return false
227 }
228 p, off := splitPtr(load.Args[0])
229 if p != base {
230 return false
231 }
232 r[i] = LoadRecord{load: load, offset: off, shift: shift}
233 }
234
235
236 slices.SortFunc(r, func(a, b LoadRecord) int {
237 return cmp.Compare(a.offset, b.offset)
238 })
239
240
241 for i := int64(0); i < n; i++ {
242 if r[i].offset != r[0].offset+i*size {
243 return false
244 }
245 }
246
247
248 shift0 := r[0].shift
249 isLittleEndian := true
250 for i := int64(0); i < n; i++ {
251 if r[i].shift != shift0+i*size*8 {
252 isLittleEndian = false
253 break
254 }
255 }
256 isBigEndian := true
257 for i := int64(0); i < n; i++ {
258 if r[i].shift != shift0-i*size*8 {
259 isBigEndian = false
260 break
261 }
262 }
263 if !isLittleEndian && !isBigEndian {
264 return false
265 }
266
267
268
269
270
271 loads := make([]*Value, n, 8)
272 for i := int64(0); i < n; i++ {
273 loads[i] = r[i].load
274 }
275 loadBlock := mergePoint(root.Block, loads...)
276 if loadBlock == nil {
277 return false
278 }
279
280 pos := src.NoXPos
281 for _, load := range loads {
282 if load.Block == loadBlock {
283 pos = load.Pos
284 break
285 }
286 }
287 if pos == src.NoXPos {
288 return false
289 }
290
291
292 needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
293 isBigEndian && !root.Block.Func.Config.BigEndian
294 if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
295 return false
296 }
297
298
299
300
301 v = loadBlock.NewValue2(pos, OpLoad, sizeType(n*size), r[0].load.Args[0], mem)
302
303
304 if needSwap {
305 v = byteSwap(loadBlock, pos, v)
306 }
307
308
309 if n*size < root.Type.Size() {
310 v = zeroExtend(loadBlock, pos, v, n*size, root.Type.Size())
311 }
312
313
314 if isLittleEndian && shift0 != 0 {
315 v = leftShift(loadBlock, pos, v, shift0)
316 }
317 if isBigEndian && shift0-(n-1)*size*8 != 0 {
318 v = leftShift(loadBlock, pos, v, shift0-(n-1)*size*8)
319 }
320
321
322 root.reset(OpCopy)
323 root.AddArg(v)
324
325
326
327 for i := int64(0); i < n; i++ {
328 clobber(r[i].load)
329 }
330 return true
331 }
332
333 func memcombineStores(f *Func) {
334 mark := f.newSparseSet(f.NumValues())
335 defer f.retSparseSet(mark)
336 var order []*Value
337
338 for _, b := range f.Blocks {
339
340 mark.clear()
341 for _, v := range b.Values {
342 if v.Op == OpStore {
343 mark.add(v.MemoryArg().ID)
344 }
345 }
346
347
348
349 order = order[:0]
350 for _, v := range b.Values {
351 if v.Op != OpStore {
352 continue
353 }
354 if mark.contains(v.ID) {
355 continue
356 }
357 for {
358 order = append(order, v)
359 v = v.Args[2]
360 if v.Block != b || v.Op != OpStore {
361 break
362 }
363 }
364 }
365
366
367 for _, v := range order {
368 if v.Op != OpStore {
369 continue
370 }
371
372 size := v.Aux.(*types.Type).Size()
373 if size >= f.Config.RegSize || size == 0 {
374 continue
375 }
376
377 for n := f.Config.RegSize / size; n > 1; n /= 2 {
378 if combineStores(v, n) {
379 continue
380 }
381 }
382 }
383 }
384 }
385
386
387
388 func combineStores(root *Value, n int64) bool {
389
390 type StoreRecord struct {
391 store *Value
392 offset int64
393 }
394 getShiftBase := func(a []StoreRecord) *Value {
395 x := a[0].store.Args[1]
396 y := a[1].store.Args[1]
397 switch x.Op {
398 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
399 x = x.Args[0]
400 default:
401 return nil
402 }
403 switch y.Op {
404 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
405 y = y.Args[0]
406 default:
407 return nil
408 }
409 var x2 *Value
410 switch x.Op {
411 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
412 x2 = x.Args[0]
413 default:
414 }
415 var y2 *Value
416 switch y.Op {
417 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
418 y2 = y.Args[0]
419 default:
420 }
421 if y2 == x {
422
423 return x
424 }
425 if x2 == y {
426
427 return y
428 }
429 if x2 == y2 {
430
431 return x2
432 }
433 return nil
434 }
435 isShiftBase := func(v, base *Value) bool {
436 val := v.Args[1]
437 switch val.Op {
438 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
439 val = val.Args[0]
440 default:
441 return false
442 }
443 if val == base {
444 return true
445 }
446 switch val.Op {
447 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
448 val = val.Args[0]
449 default:
450 return false
451 }
452 return val == base
453 }
454 shift := func(v, base *Value) int64 {
455 val := v.Args[1]
456 switch val.Op {
457 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
458 val = val.Args[0]
459 default:
460 return -1
461 }
462 if val == base {
463 return 0
464 }
465 switch val.Op {
466 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
467 val = val.Args[1]
468 default:
469 return -1
470 }
471 if val.Op != OpConst64 {
472 return -1
473 }
474 return val.AuxInt
475 }
476
477
478 size := root.Aux.(*types.Type).Size()
479 if size*n > root.Block.Func.Config.RegSize {
480 return false
481 }
482
483
484 a := make([]StoreRecord, 0, 8)
485 rbase, roff := splitPtr(root.Args[0])
486 if root.Block.Func.Config.arch == "S390X" {
487
488 if rbase.ptr.Op == OpAddr {
489 return false
490 }
491 }
492 a = append(a, StoreRecord{root, roff})
493 for i, x := int64(1), root.Args[2]; i < n; i, x = i+1, x.Args[2] {
494 if x.Op != OpStore {
495 return false
496 }
497 if x.Block != root.Block {
498 return false
499 }
500 if x.Uses != 1 {
501 return false
502 }
503 if x.Aux.(*types.Type).Size() != size {
504
505
506 return false
507 }
508 base, off := splitPtr(x.Args[0])
509 if base != rbase {
510 return false
511 }
512 a = append(a, StoreRecord{x, off})
513 }
514
515 mem := a[n-1].store.Args[2]
516
517 pos := a[n-1].store.Pos
518
519
520 slices.SortFunc(a, func(sr1, sr2 StoreRecord) int {
521 return cmp.Compare(sr1.offset, sr2.offset)
522 })
523
524
525 for i := int64(0); i < n; i++ {
526 if a[i].offset != a[0].offset+i*size {
527 return false
528 }
529 }
530
531
532 ptr := a[0].store.Args[0]
533
534
535 isConst := true
536 for i := int64(0); i < n; i++ {
537 switch a[i].store.Args[1].Op {
538 case OpConst32, OpConst16, OpConst8, OpConstBool:
539 default:
540 isConst = false
541 break
542 }
543 }
544 if isConst {
545
546 var c int64
547 mask := int64(1)<<(8*size) - 1
548 for i := int64(0); i < n; i++ {
549 s := 8 * size * int64(i)
550 if root.Block.Func.Config.BigEndian {
551 s = 8*size*(n-1) - s
552 }
553 c |= (a[i].store.Args[1].AuxInt & mask) << s
554 }
555 var cv *Value
556 switch size * n {
557 case 2:
558 cv = root.Block.Func.ConstInt16(types.Types[types.TUINT16], int16(c))
559 case 4:
560 cv = root.Block.Func.ConstInt32(types.Types[types.TUINT32], int32(c))
561 case 8:
562 cv = root.Block.Func.ConstInt64(types.Types[types.TUINT64], c)
563 }
564
565
566 for i := int64(0); i < n; i++ {
567 v := a[i].store
568 if v == root {
569 v.Aux = cv.Type
570 v.Pos = pos
571 v.SetArg(0, ptr)
572 v.SetArg(1, cv)
573 v.SetArg(2, mem)
574 } else {
575 clobber(v)
576 v.Type = types.Types[types.TBOOL]
577 }
578 }
579 return true
580 }
581
582
583 var loadMem *Value
584 var loadBase BaseAddress
585 var loadIdx int64
586 for i := int64(0); i < n; i++ {
587 load := a[i].store.Args[1]
588 if load.Op != OpLoad {
589 loadMem = nil
590 break
591 }
592 if load.Uses != 1 {
593 loadMem = nil
594 break
595 }
596 if load.Type.IsPtr() {
597
598
599
600
601 loadMem = nil
602 break
603 }
604 mem := load.Args[1]
605 base, idx := splitPtr(load.Args[0])
606 if loadMem == nil {
607
608 loadMem = mem
609 loadBase = base
610 loadIdx = idx
611 continue
612 }
613 if base != loadBase || mem != loadMem {
614 loadMem = nil
615 break
616 }
617 if idx != loadIdx+(a[i].offset-a[0].offset) {
618 loadMem = nil
619 break
620 }
621 }
622 if loadMem != nil {
623
624 load := a[0].store.Args[1]
625 switch size * n {
626 case 2:
627 load.Type = types.Types[types.TUINT16]
628 case 4:
629 load.Type = types.Types[types.TUINT32]
630 case 8:
631 load.Type = types.Types[types.TUINT64]
632 }
633
634
635 for i := int64(0); i < n; i++ {
636 v := a[i].store
637 if v == root {
638 v.Aux = load.Type
639 v.Pos = pos
640 v.SetArg(0, ptr)
641 v.SetArg(1, load)
642 v.SetArg(2, mem)
643 } else {
644 clobber(v)
645 v.Type = types.Types[types.TBOOL]
646 }
647 }
648 return true
649 }
650
651
652 shiftBase := getShiftBase(a)
653 if shiftBase == nil {
654 return false
655 }
656 for i := int64(0); i < n; i++ {
657 if !isShiftBase(a[i].store, shiftBase) {
658 return false
659 }
660 }
661
662
663 isLittleEndian := true
664 shift0 := shift(a[0].store, shiftBase)
665 for i := int64(1); i < n; i++ {
666 if shift(a[i].store, shiftBase) != shift0+i*size*8 {
667 isLittleEndian = false
668 break
669 }
670 }
671 isBigEndian := true
672 for i := int64(1); i < n; i++ {
673 if shift(a[i].store, shiftBase) != shift0-i*size*8 {
674 isBigEndian = false
675 break
676 }
677 }
678 if !isLittleEndian && !isBigEndian {
679 return false
680 }
681
682
683 needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
684 isBigEndian && !root.Block.Func.Config.BigEndian
685 if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
686 return false
687 }
688
689
690
691
692 sv := shiftBase
693 if isLittleEndian && shift0 != 0 {
694 sv = rightShift(root.Block, root.Pos, sv, shift0)
695 }
696 if isBigEndian && shift0-(n-1)*size*8 != 0 {
697 sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*size*8)
698 }
699 if sv.Type.Size() > size*n {
700 sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), size*n)
701 }
702 if needSwap {
703 sv = byteSwap(root.Block, root.Pos, sv)
704 }
705
706
707 for i := int64(0); i < n; i++ {
708 v := a[i].store
709 if v == root {
710 v.Aux = sv.Type
711 v.Pos = pos
712 v.SetArg(0, ptr)
713 v.SetArg(1, sv)
714 v.SetArg(2, mem)
715 } else {
716 clobber(v)
717 v.Type = types.Types[types.TBOOL]
718 }
719 }
720 return true
721 }
722
723 func sizeType(size int64) *types.Type {
724 switch size {
725 case 8:
726 return types.Types[types.TUINT64]
727 case 4:
728 return types.Types[types.TUINT32]
729 case 2:
730 return types.Types[types.TUINT16]
731 default:
732 base.Fatalf("bad size %d\n", size)
733 return nil
734 }
735 }
736
737 func truncate(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
738 switch from*10 + to {
739 case 82:
740 return b.NewValue1(pos, OpTrunc64to16, types.Types[types.TUINT16], v)
741 case 84:
742 return b.NewValue1(pos, OpTrunc64to32, types.Types[types.TUINT32], v)
743 case 42:
744 return b.NewValue1(pos, OpTrunc32to16, types.Types[types.TUINT16], v)
745 default:
746 base.Fatalf("bad sizes %d %d\n", from, to)
747 return nil
748 }
749 }
750 func zeroExtend(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
751 switch from*10 + to {
752 case 24:
753 return b.NewValue1(pos, OpZeroExt16to32, types.Types[types.TUINT32], v)
754 case 28:
755 return b.NewValue1(pos, OpZeroExt16to64, types.Types[types.TUINT64], v)
756 case 48:
757 return b.NewValue1(pos, OpZeroExt32to64, types.Types[types.TUINT64], v)
758 default:
759 base.Fatalf("bad sizes %d %d\n", from, to)
760 return nil
761 }
762 }
763
764 func leftShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
765 s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
766 size := v.Type.Size()
767 switch size {
768 case 8:
769 return b.NewValue2(pos, OpLsh64x64, v.Type, v, s)
770 case 4:
771 return b.NewValue2(pos, OpLsh32x64, v.Type, v, s)
772 case 2:
773 return b.NewValue2(pos, OpLsh16x64, v.Type, v, s)
774 default:
775 base.Fatalf("bad size %d\n", size)
776 return nil
777 }
778 }
779 func rightShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
780 s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
781 size := v.Type.Size()
782 switch size {
783 case 8:
784 return b.NewValue2(pos, OpRsh64Ux64, v.Type, v, s)
785 case 4:
786 return b.NewValue2(pos, OpRsh32Ux64, v.Type, v, s)
787 case 2:
788 return b.NewValue2(pos, OpRsh16Ux64, v.Type, v, s)
789 default:
790 base.Fatalf("bad size %d\n", size)
791 return nil
792 }
793 }
794 func byteSwap(b *Block, pos src.XPos, v *Value) *Value {
795 switch v.Type.Size() {
796 case 8:
797 return b.NewValue1(pos, OpBswap64, v.Type, v)
798 case 4:
799 return b.NewValue1(pos, OpBswap32, v.Type, v)
800 case 2:
801 return b.NewValue1(pos, OpBswap16, v.Type, v)
802
803 default:
804 v.Fatalf("bad size %d\n", v.Type.Size())
805 return nil
806 }
807 }
808
View as plain text