Source file src/cmd/compile/internal/ssa/pair.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/ir"
     9  	"cmd/compile/internal/types"
    10  	"slices"
    11  )
    12  
    13  // The pair pass finds memory operations that can be paired up
    14  // into single 2-register memory instructions.
    15  func pair(f *Func) {
    16  	// Only arm64 for now. This pass is fairly arch-specific.
    17  	switch f.Config.arch {
    18  	case "arm64":
    19  	default:
    20  		return
    21  	}
    22  	pairLoads(f)
    23  	pairStores(f)
    24  }
    25  
    26  type pairableLoadInfo struct {
    27  	width int64 // width of one element in the pair, in bytes
    28  	pair  Op
    29  }
    30  
    31  // All pairableLoad ops must take 2 arguments, a pointer and a memory.
    32  // They must also take an offset in Aux/AuxInt.
    33  var pairableLoads = map[Op]pairableLoadInfo{
    34  	OpARM64MOVDload:  {8, OpARM64LDP},
    35  	OpARM64MOVWUload: {4, OpARM64LDPW},
    36  	OpARM64MOVWload:  {4, OpARM64LDPSW},
    37  	// TODO: conceivably we could pair a signed and unsigned load
    38  	// if we knew the upper bits of one of them weren't being used.
    39  	OpARM64FMOVDload: {8, OpARM64FLDPD},
    40  	OpARM64FMOVSload: {4, OpARM64FLDPS},
    41  }
    42  
    43  type pairableStoreInfo struct {
    44  	width int64 // width of one element in the pair, in bytes
    45  	pair  Op
    46  }
    47  
    48  // All pairableStore keys must take 3 arguments, a pointer, a value, and a memory.
    49  // All pairableStore values must take 4 arguments, a pointer, 2 values, and a memory.
    50  // They must also take an offset in Aux/AuxInt.
    51  var pairableStores = map[Op]pairableStoreInfo{
    52  	OpARM64MOVDstore:  {8, OpARM64STP},
    53  	OpARM64MOVWstore:  {4, OpARM64STPW},
    54  	OpARM64FMOVDstore: {8, OpARM64FSTPD},
    55  	OpARM64FMOVSstore: {4, OpARM64FSTPS},
    56  }
    57  
    58  // offsetOk returns true if a pair instruction should be used
    59  // for the offset Aux+off, when the data width (of the
    60  // unpaired instructions) is width.
    61  // This function is best-effort. The compiled function must
    62  // still work if offsetOk always returns true.
    63  // TODO: this is currently arm64-specific.
    64  func offsetOk(aux Aux, off, width int64) bool {
    65  	if true {
    66  		// Seems to generate slightly smaller code if we just
    67  		// always allow this rewrite.
    68  		//
    69  		// Without pairing, we have 2 load instructions, like:
    70  		//   LDR 88(R0), R1
    71  		//   LDR 96(R0), R2
    72  		// with pairing we have, best case:
    73  		//   LDP 88(R0), R1, R2
    74  		// but maybe we need an adjuster if out of range or unaligned:
    75  		//   ADD R0, $88, R27
    76  		//   LDP (R27), R1, R2
    77  		// Even with the adjuster, it is at least no worse.
    78  		//
    79  		// A similar situation occurs when accessing globals.
    80  		// Two loads from globals requires 4 instructions,
    81  		// two ADRP and two LDR. With pairing, we need
    82  		// ADRP+ADD+LDP, three instructions.
    83  		//
    84  		// With pairing, it looks like the critical path might
    85  		// be a little bit longer. But it should never be more
    86  		// instructions.
    87  		// TODO: see if that longer critical path causes any
    88  		// regressions.
    89  		return true
    90  	}
    91  	if aux != nil {
    92  		if _, ok := aux.(*ir.Name); !ok {
    93  			// Offset is probably too big (globals).
    94  			return false
    95  		}
    96  		// We let *ir.Names pass here, as
    97  		// they are probably small offsets from SP.
    98  		// There's no guarantee that we're in range
    99  		// in that case though (we don't know the
   100  		// stack frame size yet), so the assembler
   101  		// might need to issue fixup instructions.
   102  		// Assume some small frame size.
   103  		if off >= 0 {
   104  			off += 120
   105  		}
   106  		// TODO: figure out how often this helps vs. hurts.
   107  	}
   108  	switch width {
   109  	case 4:
   110  		if off >= -256 && off <= 252 && off%4 == 0 {
   111  			return true
   112  		}
   113  	case 8:
   114  		if off >= -512 && off <= 504 && off%8 == 0 {
   115  			return true
   116  		}
   117  	}
   118  	return false
   119  }
   120  
   121  func pairLoads(f *Func) {
   122  	var loads []*Value
   123  
   124  	// Registry of aux values for sorting.
   125  	auxIDs := map[Aux]int{}
   126  	auxID := func(aux Aux) int {
   127  		id, ok := auxIDs[aux]
   128  		if !ok {
   129  			id = len(auxIDs)
   130  			auxIDs[aux] = id
   131  		}
   132  		return id
   133  	}
   134  
   135  	for _, b := range f.Blocks {
   136  		// Find loads.
   137  		loads = loads[:0]
   138  		clear(auxIDs)
   139  		for _, v := range b.Values {
   140  			info := pairableLoads[v.Op]
   141  			if info.width == 0 {
   142  				continue // not pairable
   143  			}
   144  			if !offsetOk(v.Aux, v.AuxInt, info.width) {
   145  				continue // not advisable
   146  			}
   147  			loads = append(loads, v)
   148  		}
   149  		if len(loads) < 2 {
   150  			continue
   151  		}
   152  
   153  		// Sort to put pairable loads together.
   154  		slices.SortFunc(loads, func(x, y *Value) int {
   155  			// First sort by op, ptr, and memory arg.
   156  			if x.Op != y.Op {
   157  				return int(x.Op - y.Op)
   158  			}
   159  			if x.Args[0].ID != y.Args[0].ID {
   160  				return int(x.Args[0].ID - y.Args[0].ID)
   161  			}
   162  			if x.Args[1].ID != y.Args[1].ID {
   163  				return int(x.Args[1].ID - y.Args[1].ID)
   164  			}
   165  			// Then sort by aux. (nil first, then by aux ID)
   166  			if x.Aux != nil {
   167  				if y.Aux == nil {
   168  					return 1
   169  				}
   170  				a, b := auxID(x.Aux), auxID(y.Aux)
   171  				if a != b {
   172  					return a - b
   173  				}
   174  			} else if y.Aux != nil {
   175  				return -1
   176  			}
   177  			// Then sort by offset, low to high.
   178  			return int(x.AuxInt - y.AuxInt)
   179  		})
   180  
   181  		// Look for pairable loads.
   182  		for i := 0; i < len(loads)-1; i++ {
   183  			x := loads[i]
   184  			y := loads[i+1]
   185  			if x.Op != y.Op || x.Args[0] != y.Args[0] || x.Args[1] != y.Args[1] {
   186  				continue
   187  			}
   188  			if x.Aux != y.Aux {
   189  				continue
   190  			}
   191  			if x.AuxInt+pairableLoads[x.Op].width != y.AuxInt {
   192  				continue
   193  			}
   194  
   195  			// Commit point.
   196  
   197  			// Make the 2-register load.
   198  			load := b.NewValue2IA(x.Pos, pairableLoads[x.Op].pair, types.NewTuple(x.Type, y.Type), x.AuxInt, x.Aux, x.Args[0], x.Args[1])
   199  
   200  			// Modify x to be (Select0 load). Similar for y.
   201  			x.reset(OpSelect0)
   202  			x.SetArgs1(load)
   203  			y.reset(OpSelect1)
   204  			y.SetArgs1(load)
   205  
   206  			i++ // Skip y next time around the loop.
   207  		}
   208  	}
   209  }
   210  
   211  func pairStores(f *Func) {
   212  	last := f.Cache.allocBoolSlice(f.NumValues())
   213  	defer f.Cache.freeBoolSlice(last)
   214  
   215  	// prevStore returns the previous store in the
   216  	// same block, or nil if there are none.
   217  	prevStore := func(v *Value) *Value {
   218  		if v.Op == OpInitMem || v.Op == OpPhi {
   219  			return nil
   220  		}
   221  		m := v.MemoryArg()
   222  		if m.Block != v.Block {
   223  			return nil
   224  		}
   225  		return m
   226  	}
   227  
   228  	for _, b := range f.Blocks {
   229  		// Find last store in block, so we can
   230  		// walk the stores last to first.
   231  		// Last to first helps ensure that the rewrites we
   232  		// perform do not get in the way of subsequent rewrites.
   233  		for _, v := range b.Values {
   234  			if v.Type.IsMemory() {
   235  				last[v.ID] = true
   236  			}
   237  		}
   238  		for _, v := range b.Values {
   239  			if v.Type.IsMemory() {
   240  				if m := prevStore(v); m != nil {
   241  					last[m.ID] = false
   242  				}
   243  			}
   244  		}
   245  		var lastMem *Value
   246  		for _, v := range b.Values {
   247  			if last[v.ID] {
   248  				lastMem = v
   249  				break
   250  			}
   251  		}
   252  
   253  		// Check all stores, from last to first.
   254  	memCheck:
   255  		for v := lastMem; v != nil; v = prevStore(v) {
   256  			info := pairableStores[v.Op]
   257  			if info.width == 0 {
   258  				continue // Not pairable.
   259  			}
   260  			if !offsetOk(v.Aux, v.AuxInt, info.width) {
   261  				continue // Not advisable to pair.
   262  			}
   263  			ptr := v.Args[0]
   264  			val := v.Args[1]
   265  			mem := v.Args[2]
   266  			off := v.AuxInt
   267  			aux := v.Aux
   268  
   269  			// Look for earlier store we can combine with.
   270  			lowerOk := true
   271  			higherOk := true
   272  			count := 10 // max lookback distance
   273  			for w := prevStore(v); w != nil; w = prevStore(w) {
   274  				if w.Uses != 1 {
   275  					// We can't combine stores if the earlier
   276  					// store has any use besides the next one
   277  					// in the store chain.
   278  					// (Unless we could check the aliasing of
   279  					// all those other uses.)
   280  					continue memCheck
   281  				}
   282  				if w.Op == v.Op &&
   283  					w.Args[0] == ptr &&
   284  					w.Aux == aux &&
   285  					(lowerOk && w.AuxInt == off-info.width || higherOk && w.AuxInt == off+info.width) {
   286  					// This op is mergeable with v.
   287  
   288  					// Commit point.
   289  
   290  					// ptr val1 val2 mem
   291  					args := []*Value{ptr, val, w.Args[1], mem}
   292  					if w.AuxInt == off-info.width {
   293  						args[1], args[2] = args[2], args[1]
   294  						off -= info.width
   295  					}
   296  					v.reset(info.pair)
   297  					v.AddArgs(args...)
   298  					v.Aux = aux
   299  					v.AuxInt = off
   300  					v.Pos = w.Pos // take position of earlier of the two stores (TODO: not really working?)
   301  
   302  					// Make w just a memory copy.
   303  					wmem := w.MemoryArg()
   304  					w.reset(OpCopy)
   305  					w.SetArgs1(wmem)
   306  					continue memCheck
   307  				}
   308  				if count--; count == 0 {
   309  					// Only look back so far.
   310  					// This keeps us in O(n) territory, and it
   311  					// also prevents us from keeping values
   312  					// in registers for too long (and thus
   313  					// needing to spill them).
   314  					continue memCheck
   315  				}
   316  				// We're now looking at a store w which is currently
   317  				// between the store v that we're intending to merge into,
   318  				// and the store we'll eventually find to merge with it.
   319  				// Make sure this store doesn't alias with the one
   320  				// we'll be moving.
   321  				var width int64
   322  				switch w.Op {
   323  				case OpARM64MOVDstore, OpARM64FMOVDstore:
   324  					width = 8
   325  				case OpARM64MOVWstore, OpARM64FMOVSstore:
   326  					width = 4
   327  				case OpARM64MOVHstore:
   328  					width = 2
   329  				case OpARM64MOVBstore:
   330  					width = 1
   331  				case OpCopy:
   332  					continue // this was a store we merged earlier
   333  				default:
   334  					// Can't reorder with any other memory operations.
   335  					// (atomics, calls, ...)
   336  					continue memCheck
   337  				}
   338  
   339  				// We only allow reordering with respect to other
   340  				// writes to the same pointer and aux, so we can
   341  				// compute the exact the aliasing relationship.
   342  				if w.Args[0] != ptr || w.Aux != aux {
   343  					continue memCheck
   344  				}
   345  				if overlap(w.AuxInt, width, off-info.width, info.width) {
   346  					// Aliases with slot before v's location.
   347  					lowerOk = false
   348  				}
   349  				if overlap(w.AuxInt, width, off+info.width, info.width) {
   350  					// Aliases with slot after v's location.
   351  					higherOk = false
   352  				}
   353  				if !higherOk && !lowerOk {
   354  					continue memCheck
   355  				}
   356  			}
   357  		}
   358  	}
   359  }
   360  

View as plain text