Source file src/cmd/compile/internal/ppc64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/objw"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/ssagen"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/ppc64"
    17  	"internal/buildcfg"
    18  	"math"
    19  	"strings"
    20  )
    21  
    22  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    23  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    24  	//	flive := b.FlagsLiveAtEnd
    25  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    26  	//		flive = true
    27  	//	}
    28  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    29  	//		v := b.Values[i]
    30  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    31  	//			// The "mark" is any non-nil Aux value.
    32  	//			v.Aux = v
    33  	//		}
    34  	//		if v.Type.IsFlags() {
    35  	//			flive = false
    36  	//		}
    37  	//		for _, a := range v.Args {
    38  	//			if a.Type.IsFlags() {
    39  	//				flive = true
    40  	//			}
    41  	//		}
    42  	//	}
    43  }
    44  
    45  // loadByType returns the load instruction of the given type.
    46  func loadByType(t *types.Type) obj.As {
    47  	if t.IsFloat() {
    48  		switch t.Size() {
    49  		case 4:
    50  			return ppc64.AFMOVS
    51  		case 8:
    52  			return ppc64.AFMOVD
    53  		}
    54  	} else {
    55  		switch t.Size() {
    56  		case 1:
    57  			if t.IsSigned() {
    58  				return ppc64.AMOVB
    59  			} else {
    60  				return ppc64.AMOVBZ
    61  			}
    62  		case 2:
    63  			if t.IsSigned() {
    64  				return ppc64.AMOVH
    65  			} else {
    66  				return ppc64.AMOVHZ
    67  			}
    68  		case 4:
    69  			if t.IsSigned() {
    70  				return ppc64.AMOVW
    71  			} else {
    72  				return ppc64.AMOVWZ
    73  			}
    74  		case 8:
    75  			return ppc64.AMOVD
    76  		}
    77  	}
    78  	panic("bad load type")
    79  }
    80  
    81  // storeByType returns the store instruction of the given type.
    82  func storeByType(t *types.Type) obj.As {
    83  	if t.IsFloat() {
    84  		switch t.Size() {
    85  		case 4:
    86  			return ppc64.AFMOVS
    87  		case 8:
    88  			return ppc64.AFMOVD
    89  		}
    90  	} else {
    91  		switch t.Size() {
    92  		case 1:
    93  			return ppc64.AMOVB
    94  		case 2:
    95  			return ppc64.AMOVH
    96  		case 4:
    97  			return ppc64.AMOVW
    98  		case 8:
    99  			return ppc64.AMOVD
   100  		}
   101  	}
   102  	panic("bad store type")
   103  }
   104  
   105  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   106  	switch v.Op {
   107  	case ssa.OpCopy:
   108  		t := v.Type
   109  		if t.IsMemory() {
   110  			return
   111  		}
   112  		x := v.Args[0].Reg()
   113  		y := v.Reg()
   114  		if x != y {
   115  			rt := obj.TYPE_REG
   116  			op := ppc64.AMOVD
   117  
   118  			if t.IsFloat() {
   119  				op = ppc64.AFMOVD
   120  			}
   121  			p := s.Prog(op)
   122  			p.From.Type = rt
   123  			p.From.Reg = x
   124  			p.To.Type = rt
   125  			p.To.Reg = y
   126  		}
   127  
   128  	case ssa.OpPPC64LoweredAtomicAnd8,
   129  		ssa.OpPPC64LoweredAtomicAnd32,
   130  		ssa.OpPPC64LoweredAtomicOr8,
   131  		ssa.OpPPC64LoweredAtomicOr32:
   132  		// LWSYNC
   133  		// LBAR/LWAR	(Rarg0), Rtmp
   134  		// AND/OR	Rarg1, Rtmp
   135  		// STBCCC/STWCCC Rtmp, (Rarg0)
   136  		// BNE		-3(PC)
   137  		ld := ppc64.ALBAR
   138  		st := ppc64.ASTBCCC
   139  		if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
   140  			ld = ppc64.ALWAR
   141  			st = ppc64.ASTWCCC
   142  		}
   143  		r0 := v.Args[0].Reg()
   144  		r1 := v.Args[1].Reg()
   145  		// LWSYNC - Assuming shared data not write-through-required nor
   146  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   147  		plwsync := s.Prog(ppc64.ALWSYNC)
   148  		plwsync.To.Type = obj.TYPE_NONE
   149  		// LBAR or LWAR
   150  		p := s.Prog(ld)
   151  		p.From.Type = obj.TYPE_MEM
   152  		p.From.Reg = r0
   153  		p.To.Type = obj.TYPE_REG
   154  		p.To.Reg = ppc64.REGTMP
   155  		// AND/OR reg1,out
   156  		p1 := s.Prog(v.Op.Asm())
   157  		p1.From.Type = obj.TYPE_REG
   158  		p1.From.Reg = r1
   159  		p1.To.Type = obj.TYPE_REG
   160  		p1.To.Reg = ppc64.REGTMP
   161  		// STBCCC or STWCCC
   162  		p2 := s.Prog(st)
   163  		p2.From.Type = obj.TYPE_REG
   164  		p2.From.Reg = ppc64.REGTMP
   165  		p2.To.Type = obj.TYPE_MEM
   166  		p2.To.Reg = r0
   167  		p2.RegTo2 = ppc64.REGTMP
   168  		// BNE retry
   169  		p3 := s.Prog(ppc64.ABNE)
   170  		p3.To.Type = obj.TYPE_BRANCH
   171  		p3.To.SetTarget(p)
   172  
   173  	case ssa.OpPPC64LoweredAtomicAdd32,
   174  		ssa.OpPPC64LoweredAtomicAdd64:
   175  		// LWSYNC
   176  		// LDAR/LWAR    (Rarg0), Rout
   177  		// ADD		Rarg1, Rout
   178  		// STDCCC/STWCCC Rout, (Rarg0)
   179  		// BNE         -3(PC)
   180  		// MOVW		Rout,Rout (if Add32)
   181  		ld := ppc64.ALDAR
   182  		st := ppc64.ASTDCCC
   183  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   184  			ld = ppc64.ALWAR
   185  			st = ppc64.ASTWCCC
   186  		}
   187  		r0 := v.Args[0].Reg()
   188  		r1 := v.Args[1].Reg()
   189  		out := v.Reg0()
   190  		// LWSYNC - Assuming shared data not write-through-required nor
   191  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   192  		plwsync := s.Prog(ppc64.ALWSYNC)
   193  		plwsync.To.Type = obj.TYPE_NONE
   194  		// LDAR or LWAR
   195  		p := s.Prog(ld)
   196  		p.From.Type = obj.TYPE_MEM
   197  		p.From.Reg = r0
   198  		p.To.Type = obj.TYPE_REG
   199  		p.To.Reg = out
   200  		// ADD reg1,out
   201  		p1 := s.Prog(ppc64.AADD)
   202  		p1.From.Type = obj.TYPE_REG
   203  		p1.From.Reg = r1
   204  		p1.To.Reg = out
   205  		p1.To.Type = obj.TYPE_REG
   206  		// STDCCC or STWCCC
   207  		p3 := s.Prog(st)
   208  		p3.From.Type = obj.TYPE_REG
   209  		p3.From.Reg = out
   210  		p3.To.Type = obj.TYPE_MEM
   211  		p3.To.Reg = r0
   212  		// BNE retry
   213  		p4 := s.Prog(ppc64.ABNE)
   214  		p4.To.Type = obj.TYPE_BRANCH
   215  		p4.To.SetTarget(p)
   216  
   217  		// Ensure a 32 bit result
   218  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   219  			p5 := s.Prog(ppc64.AMOVWZ)
   220  			p5.To.Type = obj.TYPE_REG
   221  			p5.To.Reg = out
   222  			p5.From.Type = obj.TYPE_REG
   223  			p5.From.Reg = out
   224  		}
   225  
   226  	case ssa.OpPPC64LoweredAtomicExchange8,
   227  		ssa.OpPPC64LoweredAtomicExchange32,
   228  		ssa.OpPPC64LoweredAtomicExchange64:
   229  		// LWSYNC
   230  		// LDAR/LWAR/LBAR        (Rarg0), Rout
   231  		// STDCCC/STWCCC/STBWCCC Rout, (Rarg0)
   232  		// BNE         -2(PC)
   233  		// ISYNC
   234  		ld := ppc64.ALDAR
   235  		st := ppc64.ASTDCCC
   236  		switch v.Op {
   237  		case ssa.OpPPC64LoweredAtomicExchange8:
   238  			ld = ppc64.ALBAR
   239  			st = ppc64.ASTBCCC
   240  		case ssa.OpPPC64LoweredAtomicExchange32:
   241  			ld = ppc64.ALWAR
   242  			st = ppc64.ASTWCCC
   243  		}
   244  		r0 := v.Args[0].Reg()
   245  		r1 := v.Args[1].Reg()
   246  		out := v.Reg0()
   247  		// LWSYNC - Assuming shared data not write-through-required nor
   248  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   249  		plwsync := s.Prog(ppc64.ALWSYNC)
   250  		plwsync.To.Type = obj.TYPE_NONE
   251  		// L[B|W|D]AR
   252  		p := s.Prog(ld)
   253  		p.From.Type = obj.TYPE_MEM
   254  		p.From.Reg = r0
   255  		p.To.Type = obj.TYPE_REG
   256  		p.To.Reg = out
   257  		// ST[B|W|D]CCC
   258  		p1 := s.Prog(st)
   259  		p1.From.Type = obj.TYPE_REG
   260  		p1.From.Reg = r1
   261  		p1.To.Type = obj.TYPE_MEM
   262  		p1.To.Reg = r0
   263  		// BNE retry
   264  		p2 := s.Prog(ppc64.ABNE)
   265  		p2.To.Type = obj.TYPE_BRANCH
   266  		p2.To.SetTarget(p)
   267  		// ISYNC
   268  		pisync := s.Prog(ppc64.AISYNC)
   269  		pisync.To.Type = obj.TYPE_NONE
   270  
   271  	case ssa.OpPPC64LoweredAtomicLoad8,
   272  		ssa.OpPPC64LoweredAtomicLoad32,
   273  		ssa.OpPPC64LoweredAtomicLoad64,
   274  		ssa.OpPPC64LoweredAtomicLoadPtr:
   275  		// SYNC
   276  		// MOVB/MOVD/MOVW (Rarg0), Rout
   277  		// CMP Rout,Rout
   278  		// BNE 1(PC)
   279  		// ISYNC
   280  		ld := ppc64.AMOVD
   281  		cmp := ppc64.ACMP
   282  		switch v.Op {
   283  		case ssa.OpPPC64LoweredAtomicLoad8:
   284  			ld = ppc64.AMOVBZ
   285  		case ssa.OpPPC64LoweredAtomicLoad32:
   286  			ld = ppc64.AMOVWZ
   287  			cmp = ppc64.ACMPW
   288  		}
   289  		arg0 := v.Args[0].Reg()
   290  		out := v.Reg0()
   291  		// SYNC when AuxInt == 1; otherwise, load-acquire
   292  		if v.AuxInt == 1 {
   293  			psync := s.Prog(ppc64.ASYNC)
   294  			psync.To.Type = obj.TYPE_NONE
   295  		}
   296  		// Load
   297  		p := s.Prog(ld)
   298  		p.From.Type = obj.TYPE_MEM
   299  		p.From.Reg = arg0
   300  		p.To.Type = obj.TYPE_REG
   301  		p.To.Reg = out
   302  		// CMP
   303  		p1 := s.Prog(cmp)
   304  		p1.From.Type = obj.TYPE_REG
   305  		p1.From.Reg = out
   306  		p1.To.Type = obj.TYPE_REG
   307  		p1.To.Reg = out
   308  		// BNE
   309  		p2 := s.Prog(ppc64.ABNE)
   310  		p2.To.Type = obj.TYPE_BRANCH
   311  		// ISYNC
   312  		pisync := s.Prog(ppc64.AISYNC)
   313  		pisync.To.Type = obj.TYPE_NONE
   314  		p2.To.SetTarget(pisync)
   315  
   316  	case ssa.OpPPC64LoweredAtomicStore8,
   317  		ssa.OpPPC64LoweredAtomicStore32,
   318  		ssa.OpPPC64LoweredAtomicStore64:
   319  		// SYNC or LWSYNC
   320  		// MOVB/MOVW/MOVD arg1,(arg0)
   321  		st := ppc64.AMOVD
   322  		switch v.Op {
   323  		case ssa.OpPPC64LoweredAtomicStore8:
   324  			st = ppc64.AMOVB
   325  		case ssa.OpPPC64LoweredAtomicStore32:
   326  			st = ppc64.AMOVW
   327  		}
   328  		arg0 := v.Args[0].Reg()
   329  		arg1 := v.Args[1].Reg()
   330  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   331  		// SYNC
   332  		syncOp := ppc64.ASYNC
   333  		if v.AuxInt == 0 {
   334  			syncOp = ppc64.ALWSYNC
   335  		}
   336  		psync := s.Prog(syncOp)
   337  		psync.To.Type = obj.TYPE_NONE
   338  		// Store
   339  		p := s.Prog(st)
   340  		p.To.Type = obj.TYPE_MEM
   341  		p.To.Reg = arg0
   342  		p.From.Type = obj.TYPE_REG
   343  		p.From.Reg = arg1
   344  
   345  	case ssa.OpPPC64LoweredAtomicCas64,
   346  		ssa.OpPPC64LoweredAtomicCas32:
   347  		// MOVD        $0, Rout
   348  		// LWSYNC
   349  		// loop:
   350  		// LDAR        (Rarg0), MutexHint, Rtmp
   351  		// CMP         Rarg1, Rtmp
   352  		// BNE         end
   353  		// STDCCC      Rarg2, (Rarg0)
   354  		// BNE         loop
   355  		// MOVD        $1, Rout
   356  		// end:
   357  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   358  		ld := ppc64.ALDAR
   359  		st := ppc64.ASTDCCC
   360  		cmp := ppc64.ACMP
   361  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   362  			ld = ppc64.ALWAR
   363  			st = ppc64.ASTWCCC
   364  			cmp = ppc64.ACMPW
   365  		}
   366  		r0 := v.Args[0].Reg()
   367  		r1 := v.Args[1].Reg()
   368  		r2 := v.Args[2].Reg()
   369  		out := v.Reg0()
   370  		// Initialize return value to false
   371  		p := s.Prog(ppc64.AMOVD)
   372  		p.From.Type = obj.TYPE_CONST
   373  		p.From.Offset = 0
   374  		p.To.Type = obj.TYPE_REG
   375  		p.To.Reg = out
   376  		// LWSYNC - Assuming shared data not write-through-required nor
   377  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   378  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   379  		plwsync1.To.Type = obj.TYPE_NONE
   380  		// LDAR or LWAR
   381  		p0 := s.Prog(ld)
   382  		p0.From.Type = obj.TYPE_MEM
   383  		p0.From.Reg = r0
   384  		p0.To.Type = obj.TYPE_REG
   385  		p0.To.Reg = ppc64.REGTMP
   386  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   387  		// the release hint.
   388  		if v.AuxInt == 0 {
   389  			p0.AddRestSourceConst(0)
   390  		}
   391  		// CMP reg1,reg2
   392  		p1 := s.Prog(cmp)
   393  		p1.From.Type = obj.TYPE_REG
   394  		p1.From.Reg = r1
   395  		p1.To.Reg = ppc64.REGTMP
   396  		p1.To.Type = obj.TYPE_REG
   397  		// BNE done with return value = false
   398  		p2 := s.Prog(ppc64.ABNE)
   399  		p2.To.Type = obj.TYPE_BRANCH
   400  		// STDCCC or STWCCC
   401  		p3 := s.Prog(st)
   402  		p3.From.Type = obj.TYPE_REG
   403  		p3.From.Reg = r2
   404  		p3.To.Type = obj.TYPE_MEM
   405  		p3.To.Reg = r0
   406  		// BNE retry
   407  		p4 := s.Prog(ppc64.ABNE)
   408  		p4.To.Type = obj.TYPE_BRANCH
   409  		p4.To.SetTarget(p0)
   410  		// return value true
   411  		p5 := s.Prog(ppc64.AMOVD)
   412  		p5.From.Type = obj.TYPE_CONST
   413  		p5.From.Offset = 1
   414  		p5.To.Type = obj.TYPE_REG
   415  		p5.To.Reg = out
   416  		// LWSYNC - Assuming shared data not write-through-required nor
   417  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   418  		// If the operation is a CAS-Release, then synchronization is not necessary.
   419  		if v.AuxInt != 0 {
   420  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   421  			plwsync2.To.Type = obj.TYPE_NONE
   422  			p2.To.SetTarget(plwsync2)
   423  		} else {
   424  			// done (label)
   425  			p6 := s.Prog(obj.ANOP)
   426  			p2.To.SetTarget(p6)
   427  		}
   428  
   429  	case ssa.OpPPC64LoweredPubBarrier:
   430  		// LWSYNC
   431  		s.Prog(v.Op.Asm())
   432  
   433  	case ssa.OpPPC64LoweredGetClosurePtr:
   434  		// Closure pointer is R11 (already)
   435  		ssagen.CheckLoweredGetClosurePtr(v)
   436  
   437  	case ssa.OpPPC64LoweredGetCallerSP:
   438  		// caller's SP is FixedFrameSize below the address of the first arg
   439  		p := s.Prog(ppc64.AMOVD)
   440  		p.From.Type = obj.TYPE_ADDR
   441  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
   442  		p.From.Name = obj.NAME_PARAM
   443  		p.To.Type = obj.TYPE_REG
   444  		p.To.Reg = v.Reg()
   445  
   446  	case ssa.OpPPC64LoweredGetCallerPC:
   447  		p := s.Prog(obj.AGETCALLERPC)
   448  		p.To.Type = obj.TYPE_REG
   449  		p.To.Reg = v.Reg()
   450  
   451  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   452  		// input is already rounded
   453  
   454  	case ssa.OpLoadReg:
   455  		loadOp := loadByType(v.Type)
   456  		p := s.Prog(loadOp)
   457  		ssagen.AddrAuto(&p.From, v.Args[0])
   458  		p.To.Type = obj.TYPE_REG
   459  		p.To.Reg = v.Reg()
   460  
   461  	case ssa.OpStoreReg:
   462  		storeOp := storeByType(v.Type)
   463  		p := s.Prog(storeOp)
   464  		p.From.Type = obj.TYPE_REG
   465  		p.From.Reg = v.Args[0].Reg()
   466  		ssagen.AddrAuto(&p.To, v)
   467  
   468  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   469  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   470  		// The loop only runs once.
   471  		for _, a := range v.Block.Func.RegArgs {
   472  			// Pass the spill/unspill information along to the assembler, offset by size of
   473  			// the saved LR slot.
   474  			addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   475  			s.FuncInfo().AddSpill(
   476  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   477  		}
   478  		v.Block.Func.RegArgs = nil
   479  
   480  		ssagen.CheckArgReg(v)
   481  
   482  	case ssa.OpPPC64DIVD:
   483  		// For now,
   484  		//
   485  		// cmp arg1, -1
   486  		// be  ahead
   487  		// v = arg0 / arg1
   488  		// b over
   489  		// ahead: v = - arg0
   490  		// over: nop
   491  		r := v.Reg()
   492  		r0 := v.Args[0].Reg()
   493  		r1 := v.Args[1].Reg()
   494  
   495  		p := s.Prog(ppc64.ACMP)
   496  		p.From.Type = obj.TYPE_REG
   497  		p.From.Reg = r1
   498  		p.To.Type = obj.TYPE_CONST
   499  		p.To.Offset = -1
   500  
   501  		pbahead := s.Prog(ppc64.ABEQ)
   502  		pbahead.To.Type = obj.TYPE_BRANCH
   503  
   504  		p = s.Prog(v.Op.Asm())
   505  		p.From.Type = obj.TYPE_REG
   506  		p.From.Reg = r1
   507  		p.Reg = r0
   508  		p.To.Type = obj.TYPE_REG
   509  		p.To.Reg = r
   510  
   511  		pbover := s.Prog(obj.AJMP)
   512  		pbover.To.Type = obj.TYPE_BRANCH
   513  
   514  		p = s.Prog(ppc64.ANEG)
   515  		p.To.Type = obj.TYPE_REG
   516  		p.To.Reg = r
   517  		p.From.Type = obj.TYPE_REG
   518  		p.From.Reg = r0
   519  		pbahead.To.SetTarget(p)
   520  
   521  		p = s.Prog(obj.ANOP)
   522  		pbover.To.SetTarget(p)
   523  
   524  	case ssa.OpPPC64DIVW:
   525  		// word-width version of above
   526  		r := v.Reg()
   527  		r0 := v.Args[0].Reg()
   528  		r1 := v.Args[1].Reg()
   529  
   530  		p := s.Prog(ppc64.ACMPW)
   531  		p.From.Type = obj.TYPE_REG
   532  		p.From.Reg = r1
   533  		p.To.Type = obj.TYPE_CONST
   534  		p.To.Offset = -1
   535  
   536  		pbahead := s.Prog(ppc64.ABEQ)
   537  		pbahead.To.Type = obj.TYPE_BRANCH
   538  
   539  		p = s.Prog(v.Op.Asm())
   540  		p.From.Type = obj.TYPE_REG
   541  		p.From.Reg = r1
   542  		p.Reg = r0
   543  		p.To.Type = obj.TYPE_REG
   544  		p.To.Reg = r
   545  
   546  		pbover := s.Prog(obj.AJMP)
   547  		pbover.To.Type = obj.TYPE_BRANCH
   548  
   549  		p = s.Prog(ppc64.ANEG)
   550  		p.To.Type = obj.TYPE_REG
   551  		p.To.Reg = r
   552  		p.From.Type = obj.TYPE_REG
   553  		p.From.Reg = r0
   554  		pbahead.To.SetTarget(p)
   555  
   556  		p = s.Prog(obj.ANOP)
   557  		pbover.To.SetTarget(p)
   558  
   559  	case ssa.OpPPC64CLRLSLWI:
   560  		r := v.Reg()
   561  		r1 := v.Args[0].Reg()
   562  		shifts := v.AuxInt
   563  		p := s.Prog(v.Op.Asm())
   564  		// clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
   565  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   566  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   567  		p.Reg = r1
   568  		p.To.Type = obj.TYPE_REG
   569  		p.To.Reg = r
   570  
   571  	case ssa.OpPPC64CLRLSLDI:
   572  		r := v.Reg()
   573  		r1 := v.Args[0].Reg()
   574  		shifts := v.AuxInt
   575  		p := s.Prog(v.Op.Asm())
   576  		// clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
   577  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   578  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   579  		p.Reg = r1
   580  		p.To.Type = obj.TYPE_REG
   581  		p.To.Reg = r
   582  
   583  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   584  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   585  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   586  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   587  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   588  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   589  		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
   590  		ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
   591  		r := v.Reg()
   592  		r1 := v.Args[0].Reg()
   593  		r2 := v.Args[1].Reg()
   594  		p := s.Prog(v.Op.Asm())
   595  		p.From.Type = obj.TYPE_REG
   596  		p.From.Reg = r2
   597  		p.Reg = r1
   598  		p.To.Type = obj.TYPE_REG
   599  		p.To.Reg = r
   600  
   601  	case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
   602  		ssa.OpPPC64ANDNCC, ssa.OpPPC64MULHDUCC:
   603  		r1 := v.Args[0].Reg()
   604  		r2 := v.Args[1].Reg()
   605  		p := s.Prog(v.Op.Asm())
   606  		p.From.Type = obj.TYPE_REG
   607  		p.From.Reg = r2
   608  		p.Reg = r1
   609  		p.To.Type = obj.TYPE_REG
   610  		p.To.Reg = v.Reg0()
   611  
   612  	case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
   613  		p := s.Prog(v.Op.Asm())
   614  		p.To.Type = obj.TYPE_REG
   615  		p.To.Reg = v.Reg0()
   616  		p.From.Type = obj.TYPE_REG
   617  		p.From.Reg = v.Args[0].Reg()
   618  
   619  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   620  		p := s.Prog(v.Op.Asm())
   621  		p.From.Type = obj.TYPE_CONST
   622  		p.From.Offset = v.AuxInt
   623  		p.Reg = v.Args[0].Reg()
   624  		p.To.Type = obj.TYPE_REG
   625  		p.To.Reg = v.Reg()
   626  
   627  		// Auxint holds encoded rotate + mask
   628  	case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
   629  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   630  		p := s.Prog(v.Op.Asm())
   631  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   632  		p.Reg = v.Args[0].Reg()
   633  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
   634  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   635  		// Auxint holds mask
   636  
   637  	case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC, ssa.OpPPC64RLDICR:
   638  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   639  		p := s.Prog(v.Op.Asm())
   640  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
   641  		switch v.Op {
   642  		case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC:
   643  			p.AddRestSourceConst(mb)
   644  		case ssa.OpPPC64RLDICR:
   645  			p.AddRestSourceConst(me)
   646  		}
   647  		p.Reg = v.Args[0].Reg()
   648  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.ResultReg()}
   649  
   650  	case ssa.OpPPC64RLWNM:
   651  		_, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   652  		p := s.Prog(v.Op.Asm())
   653  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   654  		p.Reg = v.Args[0].Reg()
   655  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
   656  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   657  
   658  	case ssa.OpPPC64MADDLD:
   659  		r := v.Reg()
   660  		r1 := v.Args[0].Reg()
   661  		r2 := v.Args[1].Reg()
   662  		r3 := v.Args[2].Reg()
   663  		// r = r1*r2 ± r3
   664  		p := s.Prog(v.Op.Asm())
   665  		p.From.Type = obj.TYPE_REG
   666  		p.From.Reg = r1
   667  		p.Reg = r2
   668  		p.AddRestSourceReg(r3)
   669  		p.To.Type = obj.TYPE_REG
   670  		p.To.Reg = r
   671  
   672  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   673  		r := v.Reg()
   674  		r1 := v.Args[0].Reg()
   675  		r2 := v.Args[1].Reg()
   676  		r3 := v.Args[2].Reg()
   677  		// r = r1*r2 ± r3
   678  		p := s.Prog(v.Op.Asm())
   679  		p.From.Type = obj.TYPE_REG
   680  		p.From.Reg = r1
   681  		p.Reg = r3
   682  		p.AddRestSourceReg(r2)
   683  		p.To.Type = obj.TYPE_REG
   684  		p.To.Reg = r
   685  
   686  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
   687  		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
   688  		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
   689  		ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
   690  		r := v.Reg()
   691  		p := s.Prog(v.Op.Asm())
   692  		p.To.Type = obj.TYPE_REG
   693  		p.To.Reg = r
   694  		p.From.Type = obj.TYPE_REG
   695  		p.From.Reg = v.Args[0].Reg()
   696  
   697  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   698  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
   699  		ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst,
   700  		ssa.OpPPC64ANDconst:
   701  		p := s.Prog(v.Op.Asm())
   702  		p.Reg = v.Args[0].Reg()
   703  		p.From.Type = obj.TYPE_CONST
   704  		p.From.Offset = v.AuxInt
   705  		p.To.Type = obj.TYPE_REG
   706  		p.To.Reg = v.Reg()
   707  
   708  	case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
   709  		r := v.Reg0() // CA is the first, implied argument.
   710  		r1 := v.Args[0].Reg()
   711  		r2 := v.Args[1].Reg()
   712  		p := s.Prog(v.Op.Asm())
   713  		p.From.Type = obj.TYPE_REG
   714  		p.From.Reg = r2
   715  		p.Reg = r1
   716  		p.To.Type = obj.TYPE_REG
   717  		p.To.Reg = r
   718  
   719  	case ssa.OpPPC64ADDZE:
   720  		p := s.Prog(v.Op.Asm())
   721  		p.From.Type = obj.TYPE_REG
   722  		p.From.Reg = v.Args[0].Reg()
   723  		p.To.Type = obj.TYPE_REG
   724  		p.To.Reg = v.Reg0()
   725  
   726  	case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
   727  		p := s.Prog(v.Op.Asm())
   728  		p.From.Type = obj.TYPE_REG
   729  		p.From.Reg = ppc64.REG_R0
   730  		p.To.Type = obj.TYPE_REG
   731  		p.To.Reg = v.Reg()
   732  
   733  	case ssa.OpPPC64ADDCconst:
   734  		p := s.Prog(v.Op.Asm())
   735  		p.Reg = v.Args[0].Reg()
   736  		p.From.Type = obj.TYPE_CONST
   737  		p.From.Offset = v.AuxInt
   738  		p.To.Type = obj.TYPE_REG
   739  		// Output is a pair, the second is the CA, which is implied.
   740  		p.To.Reg = v.Reg0()
   741  
   742  	case ssa.OpPPC64SUBCconst:
   743  		p := s.Prog(v.Op.Asm())
   744  		p.AddRestSourceConst(v.AuxInt)
   745  		p.From.Type = obj.TYPE_REG
   746  		p.From.Reg = v.Args[0].Reg()
   747  		p.To.Type = obj.TYPE_REG
   748  		p.To.Reg = v.Reg0()
   749  
   750  	case ssa.OpPPC64SUBFCconst:
   751  		p := s.Prog(v.Op.Asm())
   752  		p.AddRestSourceConst(v.AuxInt)
   753  		p.From.Type = obj.TYPE_REG
   754  		p.From.Reg = v.Args[0].Reg()
   755  		p.To.Type = obj.TYPE_REG
   756  		p.To.Reg = v.Reg()
   757  
   758  	case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
   759  		p := s.Prog(v.Op.Asm())
   760  		p.Reg = v.Args[0].Reg()
   761  		p.From.Type = obj.TYPE_CONST
   762  		p.From.Offset = v.AuxInt
   763  		p.To.Type = obj.TYPE_REG
   764  		p.To.Reg = v.Reg0()
   765  
   766  	case ssa.OpPPC64MOVDaddr:
   767  		switch v.Aux.(type) {
   768  		default:
   769  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   770  		case nil:
   771  			// If aux offset and aux int are both 0, and the same
   772  			// input and output regs are used, no instruction
   773  			// needs to be generated, since it would just be
   774  			// addi rx, rx, 0.
   775  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   776  				p := s.Prog(ppc64.AMOVD)
   777  				p.From.Type = obj.TYPE_ADDR
   778  				p.From.Reg = v.Args[0].Reg()
   779  				p.From.Offset = v.AuxInt
   780  				p.To.Type = obj.TYPE_REG
   781  				p.To.Reg = v.Reg()
   782  			}
   783  
   784  		case *obj.LSym, ir.Node:
   785  			p := s.Prog(ppc64.AMOVD)
   786  			p.From.Type = obj.TYPE_ADDR
   787  			p.From.Reg = v.Args[0].Reg()
   788  			p.To.Type = obj.TYPE_REG
   789  			p.To.Reg = v.Reg()
   790  			ssagen.AddAux(&p.From, v)
   791  
   792  		}
   793  
   794  	case ssa.OpPPC64MOVDconst:
   795  		p := s.Prog(v.Op.Asm())
   796  		p.From.Type = obj.TYPE_CONST
   797  		p.From.Offset = v.AuxInt
   798  		p.To.Type = obj.TYPE_REG
   799  		p.To.Reg = v.Reg()
   800  
   801  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   802  		p := s.Prog(v.Op.Asm())
   803  		p.From.Type = obj.TYPE_FCONST
   804  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   805  		p.To.Type = obj.TYPE_REG
   806  		p.To.Reg = v.Reg()
   807  
   808  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   809  		p := s.Prog(v.Op.Asm())
   810  		p.From.Type = obj.TYPE_REG
   811  		p.From.Reg = v.Args[0].Reg()
   812  		p.To.Type = obj.TYPE_REG
   813  		p.To.Reg = v.Args[1].Reg()
   814  
   815  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   816  		p := s.Prog(v.Op.Asm())
   817  		p.From.Type = obj.TYPE_REG
   818  		p.From.Reg = v.Args[0].Reg()
   819  		p.To.Type = obj.TYPE_CONST
   820  		p.To.Offset = v.AuxInt
   821  
   822  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   823  		// Shift in register to required size
   824  		p := s.Prog(v.Op.Asm())
   825  		p.From.Type = obj.TYPE_REG
   826  		p.From.Reg = v.Args[0].Reg()
   827  		p.To.Reg = v.Reg()
   828  		p.To.Type = obj.TYPE_REG
   829  
   830  	case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
   831  
   832  		// MOVDload and MOVWload are DS form instructions that are restricted to
   833  		// offsets that are a multiple of 4. If the offset is not a multiple of 4,
   834  		// then the address of the symbol to be loaded is computed (base + offset)
   835  		// and used as the new base register and the offset field in the instruction
   836  		// can be set to zero.
   837  
   838  		// This same problem can happen with gostrings since the final offset is not
   839  		// known yet, but could be unaligned after the relocation is resolved.
   840  		// So gostrings are handled the same way.
   841  
   842  		// This allows the MOVDload and MOVWload to be generated in more cases and
   843  		// eliminates some offset and alignment checking in the rules file.
   844  
   845  		fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   846  		ssagen.AddAux(&fromAddr, v)
   847  
   848  		genAddr := false
   849  
   850  		switch fromAddr.Name {
   851  		case obj.NAME_EXTERN, obj.NAME_STATIC:
   852  			// Special case for a rule combines the bytes of gostring.
   853  			// The v alignment might seem OK, but we don't want to load it
   854  			// using an offset because relocation comes later.
   855  			genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
   856  		default:
   857  			genAddr = fromAddr.Offset%4 != 0
   858  		}
   859  		if genAddr {
   860  			// Load full address into the temp register.
   861  			p := s.Prog(ppc64.AMOVD)
   862  			p.From.Type = obj.TYPE_ADDR
   863  			p.From.Reg = v.Args[0].Reg()
   864  			ssagen.AddAux(&p.From, v)
   865  			// Load target using temp as base register
   866  			// and offset zero. Setting NAME_NONE
   867  			// prevents any extra offsets from being
   868  			// added.
   869  			p.To.Type = obj.TYPE_REG
   870  			p.To.Reg = ppc64.REGTMP
   871  			fromAddr.Reg = ppc64.REGTMP
   872  			// Clear the offset field and other
   873  			// information that might be used
   874  			// by the assembler to add to the
   875  			// final offset value.
   876  			fromAddr.Offset = 0
   877  			fromAddr.Name = obj.NAME_NONE
   878  			fromAddr.Sym = nil
   879  		}
   880  		p := s.Prog(v.Op.Asm())
   881  		p.From = fromAddr
   882  		p.To.Type = obj.TYPE_REG
   883  		p.To.Reg = v.Reg()
   884  
   885  	case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   886  		p := s.Prog(v.Op.Asm())
   887  		p.From.Type = obj.TYPE_MEM
   888  		p.From.Reg = v.Args[0].Reg()
   889  		ssagen.AddAux(&p.From, v)
   890  		p.To.Type = obj.TYPE_REG
   891  		p.To.Reg = v.Reg()
   892  
   893  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   894  		p := s.Prog(v.Op.Asm())
   895  		p.From.Type = obj.TYPE_MEM
   896  		p.From.Reg = v.Args[0].Reg()
   897  		p.To.Type = obj.TYPE_REG
   898  		p.To.Reg = v.Reg()
   899  
   900  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   901  		p := s.Prog(v.Op.Asm())
   902  		p.To.Type = obj.TYPE_MEM
   903  		p.To.Reg = v.Args[0].Reg()
   904  		p.From.Type = obj.TYPE_REG
   905  		p.From.Reg = v.Args[1].Reg()
   906  
   907  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   908  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   909  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   910  		p := s.Prog(v.Op.Asm())
   911  		p.From.Type = obj.TYPE_MEM
   912  		p.From.Reg = v.Args[0].Reg()
   913  		p.From.Index = v.Args[1].Reg()
   914  		p.To.Type = obj.TYPE_REG
   915  		p.To.Reg = v.Reg()
   916  
   917  	case ssa.OpPPC64DCBT:
   918  		p := s.Prog(v.Op.Asm())
   919  		p.From.Type = obj.TYPE_MEM
   920  		p.From.Reg = v.Args[0].Reg()
   921  		p.To.Type = obj.TYPE_CONST
   922  		p.To.Offset = v.AuxInt
   923  
   924  	case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   925  		p := s.Prog(v.Op.Asm())
   926  		p.From.Type = obj.TYPE_REG
   927  		p.From.Reg = ppc64.REGZERO
   928  		p.To.Type = obj.TYPE_MEM
   929  		p.To.Reg = v.Args[0].Reg()
   930  		ssagen.AddAux(&p.To, v)
   931  
   932  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
   933  
   934  		// MOVDstore and MOVDstorezero become DS form instructions that are restricted
   935  		// to offset values that are a multiple of 4. If the offset field is not a
   936  		// multiple of 4, then the full address of the store target is computed (base +
   937  		// offset) and used as the new base register and the offset in the instruction
   938  		// is set to 0.
   939  
   940  		// This allows the MOVDstore and MOVDstorezero to be generated in more cases,
   941  		// and prevents checking of the offset value and alignment in the rules.
   942  
   943  		toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   944  		ssagen.AddAux(&toAddr, v)
   945  
   946  		if toAddr.Offset%4 != 0 {
   947  			p := s.Prog(ppc64.AMOVD)
   948  			p.From.Type = obj.TYPE_ADDR
   949  			p.From.Reg = v.Args[0].Reg()
   950  			ssagen.AddAux(&p.From, v)
   951  			p.To.Type = obj.TYPE_REG
   952  			p.To.Reg = ppc64.REGTMP
   953  			toAddr.Reg = ppc64.REGTMP
   954  			// Clear the offset field and other
   955  			// information that might be used
   956  			// by the assembler to add to the
   957  			// final offset value.
   958  			toAddr.Offset = 0
   959  			toAddr.Name = obj.NAME_NONE
   960  			toAddr.Sym = nil
   961  		}
   962  		p := s.Prog(v.Op.Asm())
   963  		p.To = toAddr
   964  		p.From.Type = obj.TYPE_REG
   965  		if v.Op == ssa.OpPPC64MOVDstorezero {
   966  			p.From.Reg = ppc64.REGZERO
   967  		} else {
   968  			p.From.Reg = v.Args[1].Reg()
   969  		}
   970  
   971  	case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   972  		p := s.Prog(v.Op.Asm())
   973  		p.From.Type = obj.TYPE_REG
   974  		p.From.Reg = v.Args[1].Reg()
   975  		p.To.Type = obj.TYPE_MEM
   976  		p.To.Reg = v.Args[0].Reg()
   977  		ssagen.AddAux(&p.To, v)
   978  
   979  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   980  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   981  		ssa.OpPPC64MOVHBRstoreidx:
   982  		p := s.Prog(v.Op.Asm())
   983  		p.From.Type = obj.TYPE_REG
   984  		p.From.Reg = v.Args[2].Reg()
   985  		p.To.Index = v.Args[1].Reg()
   986  		p.To.Type = obj.TYPE_MEM
   987  		p.To.Reg = v.Args[0].Reg()
   988  
   989  	case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
   990  		// ISEL  AuxInt ? arg0 : arg1
   991  		// ISELZ is a special case of ISEL where arg1 is implicitly $0.
   992  		//
   993  		// AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
   994  		// ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
   995  		// Convert the condition to a CR bit argument by the following conversion:
   996  		//
   997  		// AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
   998  		// AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
   999  		p := s.Prog(v.Op.Asm())
  1000  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
  1001  		p.Reg = v.Args[0].Reg()
  1002  		if v.Op == ssa.OpPPC64ISEL {
  1003  			p.AddRestSourceReg(v.Args[1].Reg())
  1004  		} else {
  1005  			p.AddRestSourceReg(ppc64.REG_R0)
  1006  		}
  1007  		// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
  1008  		if v.AuxInt > 3 {
  1009  			p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
  1010  		}
  1011  		p.From.SetConst(v.AuxInt & 3)
  1012  
  1013  	case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
  1014  		p := s.Prog(v.Op.Asm())
  1015  		p.To.Type = obj.TYPE_REG
  1016  		p.To.Reg = v.Reg()
  1017  		p.From.Type = obj.TYPE_REG
  1018  		p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
  1019  
  1020  	case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
  1021  		// The LoweredQuad code generation
  1022  		// generates STXV instructions on
  1023  		// power9. The Short variation is used
  1024  		// if no loop is generated.
  1025  
  1026  		// sizes >= 64 generate a loop as follows:
  1027  
  1028  		// Set up loop counter in CTR, used by BC
  1029  		// XXLXOR clears VS32
  1030  		//       XXLXOR VS32,VS32,VS32
  1031  		//       MOVD len/64,REG_TMP
  1032  		//       MOVD REG_TMP,CTR
  1033  		//       loop:
  1034  		//       STXV VS32,0(R20)
  1035  		//       STXV VS32,16(R20)
  1036  		//       STXV VS32,32(R20)
  1037  		//       STXV VS32,48(R20)
  1038  		//       ADD  $64,R20
  1039  		//       BC   16, 0, loop
  1040  
  1041  		// Bytes per iteration
  1042  		ctr := v.AuxInt / 64
  1043  
  1044  		// Remainder bytes
  1045  		rem := v.AuxInt % 64
  1046  
  1047  		// Only generate a loop if there is more
  1048  		// than 1 iteration.
  1049  		if ctr > 1 {
  1050  			// Set up VS32 (V0) to hold 0s
  1051  			p := s.Prog(ppc64.AXXLXOR)
  1052  			p.From.Type = obj.TYPE_REG
  1053  			p.From.Reg = ppc64.REG_VS32
  1054  			p.To.Type = obj.TYPE_REG
  1055  			p.To.Reg = ppc64.REG_VS32
  1056  			p.Reg = ppc64.REG_VS32
  1057  
  1058  			// Set up CTR loop counter
  1059  			p = s.Prog(ppc64.AMOVD)
  1060  			p.From.Type = obj.TYPE_CONST
  1061  			p.From.Offset = ctr
  1062  			p.To.Type = obj.TYPE_REG
  1063  			p.To.Reg = ppc64.REGTMP
  1064  
  1065  			p = s.Prog(ppc64.AMOVD)
  1066  			p.From.Type = obj.TYPE_REG
  1067  			p.From.Reg = ppc64.REGTMP
  1068  			p.To.Type = obj.TYPE_REG
  1069  			p.To.Reg = ppc64.REG_CTR
  1070  
  1071  			// Don't generate padding for
  1072  			// loops with few iterations.
  1073  			if ctr > 3 {
  1074  				p = s.Prog(obj.APCALIGN)
  1075  				p.From.Type = obj.TYPE_CONST
  1076  				p.From.Offset = 16
  1077  			}
  1078  
  1079  			// generate 4 STXVs to zero 64 bytes
  1080  			var top *obj.Prog
  1081  
  1082  			p = s.Prog(ppc64.ASTXV)
  1083  			p.From.Type = obj.TYPE_REG
  1084  			p.From.Reg = ppc64.REG_VS32
  1085  			p.To.Type = obj.TYPE_MEM
  1086  			p.To.Reg = v.Args[0].Reg()
  1087  
  1088  			//  Save the top of loop
  1089  			if top == nil {
  1090  				top = p
  1091  			}
  1092  			p = s.Prog(ppc64.ASTXV)
  1093  			p.From.Type = obj.TYPE_REG
  1094  			p.From.Reg = ppc64.REG_VS32
  1095  			p.To.Type = obj.TYPE_MEM
  1096  			p.To.Reg = v.Args[0].Reg()
  1097  			p.To.Offset = 16
  1098  
  1099  			p = s.Prog(ppc64.ASTXV)
  1100  			p.From.Type = obj.TYPE_REG
  1101  			p.From.Reg = ppc64.REG_VS32
  1102  			p.To.Type = obj.TYPE_MEM
  1103  			p.To.Reg = v.Args[0].Reg()
  1104  			p.To.Offset = 32
  1105  
  1106  			p = s.Prog(ppc64.ASTXV)
  1107  			p.From.Type = obj.TYPE_REG
  1108  			p.From.Reg = ppc64.REG_VS32
  1109  			p.To.Type = obj.TYPE_MEM
  1110  			p.To.Reg = v.Args[0].Reg()
  1111  			p.To.Offset = 48
  1112  
  1113  			// Increment address for the
  1114  			// 64 bytes just zeroed.
  1115  			p = s.Prog(ppc64.AADD)
  1116  			p.Reg = v.Args[0].Reg()
  1117  			p.From.Type = obj.TYPE_CONST
  1118  			p.From.Offset = 64
  1119  			p.To.Type = obj.TYPE_REG
  1120  			p.To.Reg = v.Args[0].Reg()
  1121  
  1122  			// Branch back to top of loop
  1123  			// based on CTR
  1124  			// BC with BO_BCTR generates bdnz
  1125  			p = s.Prog(ppc64.ABC)
  1126  			p.From.Type = obj.TYPE_CONST
  1127  			p.From.Offset = ppc64.BO_BCTR
  1128  			p.Reg = ppc64.REG_CR0LT
  1129  			p.To.Type = obj.TYPE_BRANCH
  1130  			p.To.SetTarget(top)
  1131  		}
  1132  		// When ctr == 1 the loop was not generated but
  1133  		// there are at least 64 bytes to clear, so add
  1134  		// that to the remainder to generate the code
  1135  		// to clear those doublewords
  1136  		if ctr == 1 {
  1137  			rem += 64
  1138  		}
  1139  
  1140  		// Clear the remainder starting at offset zero
  1141  		offset := int64(0)
  1142  
  1143  		if rem >= 16 && ctr <= 1 {
  1144  			// If the XXLXOR hasn't already been
  1145  			// generated, do it here to initialize
  1146  			// VS32 (V0) to 0.
  1147  			p := s.Prog(ppc64.AXXLXOR)
  1148  			p.From.Type = obj.TYPE_REG
  1149  			p.From.Reg = ppc64.REG_VS32
  1150  			p.To.Type = obj.TYPE_REG
  1151  			p.To.Reg = ppc64.REG_VS32
  1152  			p.Reg = ppc64.REG_VS32
  1153  		}
  1154  		// Generate STXV for 32 or 64
  1155  		// bytes.
  1156  		for rem >= 32 {
  1157  			p := s.Prog(ppc64.ASTXV)
  1158  			p.From.Type = obj.TYPE_REG
  1159  			p.From.Reg = ppc64.REG_VS32
  1160  			p.To.Type = obj.TYPE_MEM
  1161  			p.To.Reg = v.Args[0].Reg()
  1162  			p.To.Offset = offset
  1163  
  1164  			p = s.Prog(ppc64.ASTXV)
  1165  			p.From.Type = obj.TYPE_REG
  1166  			p.From.Reg = ppc64.REG_VS32
  1167  			p.To.Type = obj.TYPE_MEM
  1168  			p.To.Reg = v.Args[0].Reg()
  1169  			p.To.Offset = offset + 16
  1170  			offset += 32
  1171  			rem -= 32
  1172  		}
  1173  		// Generate 16 bytes
  1174  		if rem >= 16 {
  1175  			p := s.Prog(ppc64.ASTXV)
  1176  			p.From.Type = obj.TYPE_REG
  1177  			p.From.Reg = ppc64.REG_VS32
  1178  			p.To.Type = obj.TYPE_MEM
  1179  			p.To.Reg = v.Args[0].Reg()
  1180  			p.To.Offset = offset
  1181  			offset += 16
  1182  			rem -= 16
  1183  		}
  1184  
  1185  		// first clear as many doublewords as possible
  1186  		// then clear remaining sizes as available
  1187  		for rem > 0 {
  1188  			op, size := ppc64.AMOVB, int64(1)
  1189  			switch {
  1190  			case rem >= 8:
  1191  				op, size = ppc64.AMOVD, 8
  1192  			case rem >= 4:
  1193  				op, size = ppc64.AMOVW, 4
  1194  			case rem >= 2:
  1195  				op, size = ppc64.AMOVH, 2
  1196  			}
  1197  			p := s.Prog(op)
  1198  			p.From.Type = obj.TYPE_REG
  1199  			p.From.Reg = ppc64.REG_R0
  1200  			p.To.Type = obj.TYPE_MEM
  1201  			p.To.Reg = v.Args[0].Reg()
  1202  			p.To.Offset = offset
  1203  			rem -= size
  1204  			offset += size
  1205  		}
  1206  
  1207  	case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
  1208  
  1209  		// Unaligned data doesn't hurt performance
  1210  		// for these instructions on power8.
  1211  
  1212  		// For sizes >= 64 generate a loop as follows:
  1213  
  1214  		// Set up loop counter in CTR, used by BC
  1215  		//       XXLXOR VS32,VS32,VS32
  1216  		//	 MOVD len/32,REG_TMP
  1217  		//	 MOVD REG_TMP,CTR
  1218  		//       MOVD $16,REG_TMP
  1219  		//	 loop:
  1220  		//	 STXVD2X VS32,(R0)(R20)
  1221  		//	 STXVD2X VS32,(R31)(R20)
  1222  		//	 ADD  $32,R20
  1223  		//	 BC   16, 0, loop
  1224  		//
  1225  		// any remainder is done as described below
  1226  
  1227  		// for sizes < 64 bytes, first clear as many doublewords as possible,
  1228  		// then handle the remainder
  1229  		//	MOVD R0,(R20)
  1230  		//	MOVD R0,8(R20)
  1231  		// .... etc.
  1232  		//
  1233  		// the remainder bytes are cleared using one or more
  1234  		// of the following instructions with the appropriate
  1235  		// offsets depending which instructions are needed
  1236  		//
  1237  		//	MOVW R0,n1(R20)	4 bytes
  1238  		//	MOVH R0,n2(R20)	2 bytes
  1239  		//	MOVB R0,n3(R20)	1 byte
  1240  		//
  1241  		// 7 bytes: MOVW, MOVH, MOVB
  1242  		// 6 bytes: MOVW, MOVH
  1243  		// 5 bytes: MOVW, MOVB
  1244  		// 3 bytes: MOVH, MOVB
  1245  
  1246  		// each loop iteration does 32 bytes
  1247  		ctr := v.AuxInt / 32
  1248  
  1249  		// remainder bytes
  1250  		rem := v.AuxInt % 32
  1251  
  1252  		// only generate a loop if there is more
  1253  		// than 1 iteration.
  1254  		if ctr > 1 {
  1255  			// Set up VS32 (V0) to hold 0s
  1256  			p := s.Prog(ppc64.AXXLXOR)
  1257  			p.From.Type = obj.TYPE_REG
  1258  			p.From.Reg = ppc64.REG_VS32
  1259  			p.To.Type = obj.TYPE_REG
  1260  			p.To.Reg = ppc64.REG_VS32
  1261  			p.Reg = ppc64.REG_VS32
  1262  
  1263  			// Set up CTR loop counter
  1264  			p = s.Prog(ppc64.AMOVD)
  1265  			p.From.Type = obj.TYPE_CONST
  1266  			p.From.Offset = ctr
  1267  			p.To.Type = obj.TYPE_REG
  1268  			p.To.Reg = ppc64.REGTMP
  1269  
  1270  			p = s.Prog(ppc64.AMOVD)
  1271  			p.From.Type = obj.TYPE_REG
  1272  			p.From.Reg = ppc64.REGTMP
  1273  			p.To.Type = obj.TYPE_REG
  1274  			p.To.Reg = ppc64.REG_CTR
  1275  
  1276  			// Set up R31 to hold index value 16
  1277  			p = s.Prog(ppc64.AMOVD)
  1278  			p.From.Type = obj.TYPE_CONST
  1279  			p.From.Offset = 16
  1280  			p.To.Type = obj.TYPE_REG
  1281  			p.To.Reg = ppc64.REGTMP
  1282  
  1283  			// Don't add padding for alignment
  1284  			// with few loop iterations.
  1285  			if ctr > 3 {
  1286  				p = s.Prog(obj.APCALIGN)
  1287  				p.From.Type = obj.TYPE_CONST
  1288  				p.From.Offset = 16
  1289  			}
  1290  
  1291  			// generate 2 STXVD2Xs to store 16 bytes
  1292  			// when this is a loop then the top must be saved
  1293  			var top *obj.Prog
  1294  			// This is the top of loop
  1295  
  1296  			p = s.Prog(ppc64.ASTXVD2X)
  1297  			p.From.Type = obj.TYPE_REG
  1298  			p.From.Reg = ppc64.REG_VS32
  1299  			p.To.Type = obj.TYPE_MEM
  1300  			p.To.Reg = v.Args[0].Reg()
  1301  			p.To.Index = ppc64.REGZERO
  1302  			// Save the top of loop
  1303  			if top == nil {
  1304  				top = p
  1305  			}
  1306  			p = s.Prog(ppc64.ASTXVD2X)
  1307  			p.From.Type = obj.TYPE_REG
  1308  			p.From.Reg = ppc64.REG_VS32
  1309  			p.To.Type = obj.TYPE_MEM
  1310  			p.To.Reg = v.Args[0].Reg()
  1311  			p.To.Index = ppc64.REGTMP
  1312  
  1313  			// Increment address for the
  1314  			// 4 doublewords just zeroed.
  1315  			p = s.Prog(ppc64.AADD)
  1316  			p.Reg = v.Args[0].Reg()
  1317  			p.From.Type = obj.TYPE_CONST
  1318  			p.From.Offset = 32
  1319  			p.To.Type = obj.TYPE_REG
  1320  			p.To.Reg = v.Args[0].Reg()
  1321  
  1322  			// Branch back to top of loop
  1323  			// based on CTR
  1324  			// BC with BO_BCTR generates bdnz
  1325  			p = s.Prog(ppc64.ABC)
  1326  			p.From.Type = obj.TYPE_CONST
  1327  			p.From.Offset = ppc64.BO_BCTR
  1328  			p.Reg = ppc64.REG_CR0LT
  1329  			p.To.Type = obj.TYPE_BRANCH
  1330  			p.To.SetTarget(top)
  1331  		}
  1332  
  1333  		// when ctr == 1 the loop was not generated but
  1334  		// there are at least 32 bytes to clear, so add
  1335  		// that to the remainder to generate the code
  1336  		// to clear those doublewords
  1337  		if ctr == 1 {
  1338  			rem += 32
  1339  		}
  1340  
  1341  		// clear the remainder starting at offset zero
  1342  		offset := int64(0)
  1343  
  1344  		// first clear as many doublewords as possible
  1345  		// then clear remaining sizes as available
  1346  		for rem > 0 {
  1347  			op, size := ppc64.AMOVB, int64(1)
  1348  			switch {
  1349  			case rem >= 8:
  1350  				op, size = ppc64.AMOVD, 8
  1351  			case rem >= 4:
  1352  				op, size = ppc64.AMOVW, 4
  1353  			case rem >= 2:
  1354  				op, size = ppc64.AMOVH, 2
  1355  			}
  1356  			p := s.Prog(op)
  1357  			p.From.Type = obj.TYPE_REG
  1358  			p.From.Reg = ppc64.REG_R0
  1359  			p.To.Type = obj.TYPE_MEM
  1360  			p.To.Reg = v.Args[0].Reg()
  1361  			p.To.Offset = offset
  1362  			rem -= size
  1363  			offset += size
  1364  		}
  1365  
  1366  	case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
  1367  
  1368  		bytesPerLoop := int64(32)
  1369  		// This will be used when moving more
  1370  		// than 8 bytes.  Moves start with
  1371  		// as many 8 byte moves as possible, then
  1372  		// 4, 2, or 1 byte(s) as remaining.  This will
  1373  		// work and be efficient for power8 or later.
  1374  		// If there are 64 or more bytes, then a
  1375  		// loop is generated to move 32 bytes and
  1376  		// update the src and dst addresses on each
  1377  		// iteration. When < 64 bytes, the appropriate
  1378  		// number of moves are generated based on the
  1379  		// size.
  1380  		// When moving >= 64 bytes a loop is used
  1381  		//	MOVD len/32,REG_TMP
  1382  		//	MOVD REG_TMP,CTR
  1383  		//	MOVD $16,REG_TMP
  1384  		// top:
  1385  		//	LXVD2X (R0)(R21),VS32
  1386  		//	LXVD2X (R31)(R21),VS33
  1387  		//	ADD $32,R21
  1388  		//	STXVD2X VS32,(R0)(R20)
  1389  		//	STXVD2X VS33,(R31)(R20)
  1390  		//	ADD $32,R20
  1391  		//	BC 16,0,top
  1392  		// Bytes not moved by this loop are moved
  1393  		// with a combination of the following instructions,
  1394  		// starting with the largest sizes and generating as
  1395  		// many as needed, using the appropriate offset value.
  1396  		//	MOVD  n(R21),R31
  1397  		//	MOVD  R31,n(R20)
  1398  		//	MOVW  n1(R21),R31
  1399  		//	MOVW  R31,n1(R20)
  1400  		//	MOVH  n2(R21),R31
  1401  		//	MOVH  R31,n2(R20)
  1402  		//	MOVB  n3(R21),R31
  1403  		//	MOVB  R31,n3(R20)
  1404  
  1405  		// Each loop iteration moves 32 bytes
  1406  		ctr := v.AuxInt / bytesPerLoop
  1407  
  1408  		// Remainder after the loop
  1409  		rem := v.AuxInt % bytesPerLoop
  1410  
  1411  		dstReg := v.Args[0].Reg()
  1412  		srcReg := v.Args[1].Reg()
  1413  
  1414  		// The set of registers used here, must match the clobbered reg list
  1415  		// in PPC64Ops.go.
  1416  		offset := int64(0)
  1417  
  1418  		// top of the loop
  1419  		var top *obj.Prog
  1420  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1421  		if ctr > 1 {
  1422  			// Set up the CTR
  1423  			p := s.Prog(ppc64.AMOVD)
  1424  			p.From.Type = obj.TYPE_CONST
  1425  			p.From.Offset = ctr
  1426  			p.To.Type = obj.TYPE_REG
  1427  			p.To.Reg = ppc64.REGTMP
  1428  
  1429  			p = s.Prog(ppc64.AMOVD)
  1430  			p.From.Type = obj.TYPE_REG
  1431  			p.From.Reg = ppc64.REGTMP
  1432  			p.To.Type = obj.TYPE_REG
  1433  			p.To.Reg = ppc64.REG_CTR
  1434  
  1435  			// Use REGTMP as index reg
  1436  			p = s.Prog(ppc64.AMOVD)
  1437  			p.From.Type = obj.TYPE_CONST
  1438  			p.From.Offset = 16
  1439  			p.To.Type = obj.TYPE_REG
  1440  			p.To.Reg = ppc64.REGTMP
  1441  
  1442  			// Don't adding padding for
  1443  			// alignment with small iteration
  1444  			// counts.
  1445  			if ctr > 3 {
  1446  				p = s.Prog(obj.APCALIGN)
  1447  				p.From.Type = obj.TYPE_CONST
  1448  				p.From.Offset = 16
  1449  			}
  1450  
  1451  			// Generate 16 byte loads and stores.
  1452  			// Use temp register for index (16)
  1453  			// on the second one.
  1454  
  1455  			p = s.Prog(ppc64.ALXVD2X)
  1456  			p.From.Type = obj.TYPE_MEM
  1457  			p.From.Reg = srcReg
  1458  			p.From.Index = ppc64.REGZERO
  1459  			p.To.Type = obj.TYPE_REG
  1460  			p.To.Reg = ppc64.REG_VS32
  1461  			if top == nil {
  1462  				top = p
  1463  			}
  1464  			p = s.Prog(ppc64.ALXVD2X)
  1465  			p.From.Type = obj.TYPE_MEM
  1466  			p.From.Reg = srcReg
  1467  			p.From.Index = ppc64.REGTMP
  1468  			p.To.Type = obj.TYPE_REG
  1469  			p.To.Reg = ppc64.REG_VS33
  1470  
  1471  			// increment the src reg for next iteration
  1472  			p = s.Prog(ppc64.AADD)
  1473  			p.Reg = srcReg
  1474  			p.From.Type = obj.TYPE_CONST
  1475  			p.From.Offset = bytesPerLoop
  1476  			p.To.Type = obj.TYPE_REG
  1477  			p.To.Reg = srcReg
  1478  
  1479  			// generate 16 byte stores
  1480  			p = s.Prog(ppc64.ASTXVD2X)
  1481  			p.From.Type = obj.TYPE_REG
  1482  			p.From.Reg = ppc64.REG_VS32
  1483  			p.To.Type = obj.TYPE_MEM
  1484  			p.To.Reg = dstReg
  1485  			p.To.Index = ppc64.REGZERO
  1486  
  1487  			p = s.Prog(ppc64.ASTXVD2X)
  1488  			p.From.Type = obj.TYPE_REG
  1489  			p.From.Reg = ppc64.REG_VS33
  1490  			p.To.Type = obj.TYPE_MEM
  1491  			p.To.Reg = dstReg
  1492  			p.To.Index = ppc64.REGTMP
  1493  
  1494  			// increment the dst reg for next iteration
  1495  			p = s.Prog(ppc64.AADD)
  1496  			p.Reg = dstReg
  1497  			p.From.Type = obj.TYPE_CONST
  1498  			p.From.Offset = bytesPerLoop
  1499  			p.To.Type = obj.TYPE_REG
  1500  			p.To.Reg = dstReg
  1501  
  1502  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1503  			// to loop top.
  1504  			p = s.Prog(ppc64.ABC)
  1505  			p.From.Type = obj.TYPE_CONST
  1506  			p.From.Offset = ppc64.BO_BCTR
  1507  			p.Reg = ppc64.REG_CR0LT
  1508  			p.To.Type = obj.TYPE_BRANCH
  1509  			p.To.SetTarget(top)
  1510  
  1511  			// srcReg and dstReg were incremented in the loop, so
  1512  			// later instructions start with offset 0.
  1513  			offset = int64(0)
  1514  		}
  1515  
  1516  		// No loop was generated for one iteration, so
  1517  		// add 32 bytes to the remainder to move those bytes.
  1518  		if ctr == 1 {
  1519  			rem += bytesPerLoop
  1520  		}
  1521  
  1522  		if rem >= 16 {
  1523  			// Generate 16 byte loads and stores.
  1524  			// Use temp register for index (value 16)
  1525  			// on the second one.
  1526  			p := s.Prog(ppc64.ALXVD2X)
  1527  			p.From.Type = obj.TYPE_MEM
  1528  			p.From.Reg = srcReg
  1529  			p.From.Index = ppc64.REGZERO
  1530  			p.To.Type = obj.TYPE_REG
  1531  			p.To.Reg = ppc64.REG_VS32
  1532  
  1533  			p = s.Prog(ppc64.ASTXVD2X)
  1534  			p.From.Type = obj.TYPE_REG
  1535  			p.From.Reg = ppc64.REG_VS32
  1536  			p.To.Type = obj.TYPE_MEM
  1537  			p.To.Reg = dstReg
  1538  			p.To.Index = ppc64.REGZERO
  1539  
  1540  			offset = 16
  1541  			rem -= 16
  1542  
  1543  			if rem >= 16 {
  1544  				// Use REGTMP as index reg
  1545  				p := s.Prog(ppc64.AMOVD)
  1546  				p.From.Type = obj.TYPE_CONST
  1547  				p.From.Offset = 16
  1548  				p.To.Type = obj.TYPE_REG
  1549  				p.To.Reg = ppc64.REGTMP
  1550  
  1551  				p = s.Prog(ppc64.ALXVD2X)
  1552  				p.From.Type = obj.TYPE_MEM
  1553  				p.From.Reg = srcReg
  1554  				p.From.Index = ppc64.REGTMP
  1555  				p.To.Type = obj.TYPE_REG
  1556  				p.To.Reg = ppc64.REG_VS32
  1557  
  1558  				p = s.Prog(ppc64.ASTXVD2X)
  1559  				p.From.Type = obj.TYPE_REG
  1560  				p.From.Reg = ppc64.REG_VS32
  1561  				p.To.Type = obj.TYPE_MEM
  1562  				p.To.Reg = dstReg
  1563  				p.To.Index = ppc64.REGTMP
  1564  
  1565  				offset = 32
  1566  				rem -= 16
  1567  			}
  1568  		}
  1569  
  1570  		// Generate all the remaining load and store pairs, starting with
  1571  		// as many 8 byte moves as possible, then 4, 2, 1.
  1572  		for rem > 0 {
  1573  			op, size := ppc64.AMOVB, int64(1)
  1574  			switch {
  1575  			case rem >= 8:
  1576  				op, size = ppc64.AMOVD, 8
  1577  			case rem >= 4:
  1578  				op, size = ppc64.AMOVWZ, 4
  1579  			case rem >= 2:
  1580  				op, size = ppc64.AMOVH, 2
  1581  			}
  1582  			// Load
  1583  			p := s.Prog(op)
  1584  			p.To.Type = obj.TYPE_REG
  1585  			p.To.Reg = ppc64.REGTMP
  1586  			p.From.Type = obj.TYPE_MEM
  1587  			p.From.Reg = srcReg
  1588  			p.From.Offset = offset
  1589  
  1590  			// Store
  1591  			p = s.Prog(op)
  1592  			p.From.Type = obj.TYPE_REG
  1593  			p.From.Reg = ppc64.REGTMP
  1594  			p.To.Type = obj.TYPE_MEM
  1595  			p.To.Reg = dstReg
  1596  			p.To.Offset = offset
  1597  			rem -= size
  1598  			offset += size
  1599  		}
  1600  
  1601  	case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
  1602  		bytesPerLoop := int64(64)
  1603  		// This is used when moving more
  1604  		// than 8 bytes on power9.  Moves start with
  1605  		// as many 8 byte moves as possible, then
  1606  		// 4, 2, or 1 byte(s) as remaining.  This will
  1607  		// work and be efficient for power8 or later.
  1608  		// If there are 64 or more bytes, then a
  1609  		// loop is generated to move 32 bytes and
  1610  		// update the src and dst addresses on each
  1611  		// iteration. When < 64 bytes, the appropriate
  1612  		// number of moves are generated based on the
  1613  		// size.
  1614  		// When moving >= 64 bytes a loop is used
  1615  		//      MOVD len/32,REG_TMP
  1616  		//      MOVD REG_TMP,CTR
  1617  		// top:
  1618  		//      LXV 0(R21),VS32
  1619  		//      LXV 16(R21),VS33
  1620  		//      ADD $32,R21
  1621  		//      STXV VS32,0(R20)
  1622  		//      STXV VS33,16(R20)
  1623  		//      ADD $32,R20
  1624  		//      BC 16,0,top
  1625  		// Bytes not moved by this loop are moved
  1626  		// with a combination of the following instructions,
  1627  		// starting with the largest sizes and generating as
  1628  		// many as needed, using the appropriate offset value.
  1629  		//      MOVD  n(R21),R31
  1630  		//      MOVD  R31,n(R20)
  1631  		//      MOVW  n1(R21),R31
  1632  		//      MOVW  R31,n1(R20)
  1633  		//      MOVH  n2(R21),R31
  1634  		//      MOVH  R31,n2(R20)
  1635  		//      MOVB  n3(R21),R31
  1636  		//      MOVB  R31,n3(R20)
  1637  
  1638  		// Each loop iteration moves 32 bytes
  1639  		ctr := v.AuxInt / bytesPerLoop
  1640  
  1641  		// Remainder after the loop
  1642  		rem := v.AuxInt % bytesPerLoop
  1643  
  1644  		dstReg := v.Args[0].Reg()
  1645  		srcReg := v.Args[1].Reg()
  1646  
  1647  		offset := int64(0)
  1648  
  1649  		// top of the loop
  1650  		var top *obj.Prog
  1651  
  1652  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1653  		if ctr > 1 {
  1654  			// Set up the CTR
  1655  			p := s.Prog(ppc64.AMOVD)
  1656  			p.From.Type = obj.TYPE_CONST
  1657  			p.From.Offset = ctr
  1658  			p.To.Type = obj.TYPE_REG
  1659  			p.To.Reg = ppc64.REGTMP
  1660  
  1661  			p = s.Prog(ppc64.AMOVD)
  1662  			p.From.Type = obj.TYPE_REG
  1663  			p.From.Reg = ppc64.REGTMP
  1664  			p.To.Type = obj.TYPE_REG
  1665  			p.To.Reg = ppc64.REG_CTR
  1666  
  1667  			p = s.Prog(obj.APCALIGN)
  1668  			p.From.Type = obj.TYPE_CONST
  1669  			p.From.Offset = 16
  1670  
  1671  			// Generate 16 byte loads and stores.
  1672  			p = s.Prog(ppc64.ALXV)
  1673  			p.From.Type = obj.TYPE_MEM
  1674  			p.From.Reg = srcReg
  1675  			p.From.Offset = offset
  1676  			p.To.Type = obj.TYPE_REG
  1677  			p.To.Reg = ppc64.REG_VS32
  1678  			if top == nil {
  1679  				top = p
  1680  			}
  1681  			p = s.Prog(ppc64.ALXV)
  1682  			p.From.Type = obj.TYPE_MEM
  1683  			p.From.Reg = srcReg
  1684  			p.From.Offset = offset + 16
  1685  			p.To.Type = obj.TYPE_REG
  1686  			p.To.Reg = ppc64.REG_VS33
  1687  
  1688  			// generate 16 byte stores
  1689  			p = s.Prog(ppc64.ASTXV)
  1690  			p.From.Type = obj.TYPE_REG
  1691  			p.From.Reg = ppc64.REG_VS32
  1692  			p.To.Type = obj.TYPE_MEM
  1693  			p.To.Reg = dstReg
  1694  			p.To.Offset = offset
  1695  
  1696  			p = s.Prog(ppc64.ASTXV)
  1697  			p.From.Type = obj.TYPE_REG
  1698  			p.From.Reg = ppc64.REG_VS33
  1699  			p.To.Type = obj.TYPE_MEM
  1700  			p.To.Reg = dstReg
  1701  			p.To.Offset = offset + 16
  1702  
  1703  			// Generate 16 byte loads and stores.
  1704  			p = s.Prog(ppc64.ALXV)
  1705  			p.From.Type = obj.TYPE_MEM
  1706  			p.From.Reg = srcReg
  1707  			p.From.Offset = offset + 32
  1708  			p.To.Type = obj.TYPE_REG
  1709  			p.To.Reg = ppc64.REG_VS32
  1710  
  1711  			p = s.Prog(ppc64.ALXV)
  1712  			p.From.Type = obj.TYPE_MEM
  1713  			p.From.Reg = srcReg
  1714  			p.From.Offset = offset + 48
  1715  			p.To.Type = obj.TYPE_REG
  1716  			p.To.Reg = ppc64.REG_VS33
  1717  
  1718  			// generate 16 byte stores
  1719  			p = s.Prog(ppc64.ASTXV)
  1720  			p.From.Type = obj.TYPE_REG
  1721  			p.From.Reg = ppc64.REG_VS32
  1722  			p.To.Type = obj.TYPE_MEM
  1723  			p.To.Reg = dstReg
  1724  			p.To.Offset = offset + 32
  1725  
  1726  			p = s.Prog(ppc64.ASTXV)
  1727  			p.From.Type = obj.TYPE_REG
  1728  			p.From.Reg = ppc64.REG_VS33
  1729  			p.To.Type = obj.TYPE_MEM
  1730  			p.To.Reg = dstReg
  1731  			p.To.Offset = offset + 48
  1732  
  1733  			// increment the src reg for next iteration
  1734  			p = s.Prog(ppc64.AADD)
  1735  			p.Reg = srcReg
  1736  			p.From.Type = obj.TYPE_CONST
  1737  			p.From.Offset = bytesPerLoop
  1738  			p.To.Type = obj.TYPE_REG
  1739  			p.To.Reg = srcReg
  1740  
  1741  			// increment the dst reg for next iteration
  1742  			p = s.Prog(ppc64.AADD)
  1743  			p.Reg = dstReg
  1744  			p.From.Type = obj.TYPE_CONST
  1745  			p.From.Offset = bytesPerLoop
  1746  			p.To.Type = obj.TYPE_REG
  1747  			p.To.Reg = dstReg
  1748  
  1749  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1750  			// to loop top.
  1751  			p = s.Prog(ppc64.ABC)
  1752  			p.From.Type = obj.TYPE_CONST
  1753  			p.From.Offset = ppc64.BO_BCTR
  1754  			p.Reg = ppc64.REG_CR0LT
  1755  			p.To.Type = obj.TYPE_BRANCH
  1756  			p.To.SetTarget(top)
  1757  
  1758  			// srcReg and dstReg were incremented in the loop, so
  1759  			// later instructions start with offset 0.
  1760  			offset = int64(0)
  1761  		}
  1762  
  1763  		// No loop was generated for one iteration, so
  1764  		// add 32 bytes to the remainder to move those bytes.
  1765  		if ctr == 1 {
  1766  			rem += bytesPerLoop
  1767  		}
  1768  		if rem >= 32 {
  1769  			p := s.Prog(ppc64.ALXV)
  1770  			p.From.Type = obj.TYPE_MEM
  1771  			p.From.Reg = srcReg
  1772  			p.To.Type = obj.TYPE_REG
  1773  			p.To.Reg = ppc64.REG_VS32
  1774  
  1775  			p = s.Prog(ppc64.ALXV)
  1776  			p.From.Type = obj.TYPE_MEM
  1777  			p.From.Reg = srcReg
  1778  			p.From.Offset = 16
  1779  			p.To.Type = obj.TYPE_REG
  1780  			p.To.Reg = ppc64.REG_VS33
  1781  
  1782  			p = s.Prog(ppc64.ASTXV)
  1783  			p.From.Type = obj.TYPE_REG
  1784  			p.From.Reg = ppc64.REG_VS32
  1785  			p.To.Type = obj.TYPE_MEM
  1786  			p.To.Reg = dstReg
  1787  
  1788  			p = s.Prog(ppc64.ASTXV)
  1789  			p.From.Type = obj.TYPE_REG
  1790  			p.From.Reg = ppc64.REG_VS33
  1791  			p.To.Type = obj.TYPE_MEM
  1792  			p.To.Reg = dstReg
  1793  			p.To.Offset = 16
  1794  
  1795  			offset = 32
  1796  			rem -= 32
  1797  		}
  1798  
  1799  		if rem >= 16 {
  1800  			// Generate 16 byte loads and stores.
  1801  			p := s.Prog(ppc64.ALXV)
  1802  			p.From.Type = obj.TYPE_MEM
  1803  			p.From.Reg = srcReg
  1804  			p.From.Offset = offset
  1805  			p.To.Type = obj.TYPE_REG
  1806  			p.To.Reg = ppc64.REG_VS32
  1807  
  1808  			p = s.Prog(ppc64.ASTXV)
  1809  			p.From.Type = obj.TYPE_REG
  1810  			p.From.Reg = ppc64.REG_VS32
  1811  			p.To.Type = obj.TYPE_MEM
  1812  			p.To.Reg = dstReg
  1813  			p.To.Offset = offset
  1814  
  1815  			offset += 16
  1816  			rem -= 16
  1817  
  1818  			if rem >= 16 {
  1819  				p := s.Prog(ppc64.ALXV)
  1820  				p.From.Type = obj.TYPE_MEM
  1821  				p.From.Reg = srcReg
  1822  				p.From.Offset = offset
  1823  				p.To.Type = obj.TYPE_REG
  1824  				p.To.Reg = ppc64.REG_VS32
  1825  
  1826  				p = s.Prog(ppc64.ASTXV)
  1827  				p.From.Type = obj.TYPE_REG
  1828  				p.From.Reg = ppc64.REG_VS32
  1829  				p.To.Type = obj.TYPE_MEM
  1830  				p.To.Reg = dstReg
  1831  				p.To.Offset = offset
  1832  
  1833  				offset += 16
  1834  				rem -= 16
  1835  			}
  1836  		}
  1837  		// Generate all the remaining load and store pairs, starting with
  1838  		// as many 8 byte moves as possible, then 4, 2, 1.
  1839  		for rem > 0 {
  1840  			op, size := ppc64.AMOVB, int64(1)
  1841  			switch {
  1842  			case rem >= 8:
  1843  				op, size = ppc64.AMOVD, 8
  1844  			case rem >= 4:
  1845  				op, size = ppc64.AMOVWZ, 4
  1846  			case rem >= 2:
  1847  				op, size = ppc64.AMOVH, 2
  1848  			}
  1849  			// Load
  1850  			p := s.Prog(op)
  1851  			p.To.Type = obj.TYPE_REG
  1852  			p.To.Reg = ppc64.REGTMP
  1853  			p.From.Type = obj.TYPE_MEM
  1854  			p.From.Reg = srcReg
  1855  			p.From.Offset = offset
  1856  
  1857  			// Store
  1858  			p = s.Prog(op)
  1859  			p.From.Type = obj.TYPE_REG
  1860  			p.From.Reg = ppc64.REGTMP
  1861  			p.To.Type = obj.TYPE_MEM
  1862  			p.To.Reg = dstReg
  1863  			p.To.Offset = offset
  1864  			rem -= size
  1865  			offset += size
  1866  		}
  1867  
  1868  	case ssa.OpPPC64CALLstatic:
  1869  		s.Call(v)
  1870  
  1871  	case ssa.OpPPC64CALLtail:
  1872  		s.TailCall(v)
  1873  
  1874  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1875  		p := s.Prog(ppc64.AMOVD)
  1876  		p.From.Type = obj.TYPE_REG
  1877  		p.From.Reg = v.Args[0].Reg()
  1878  		p.To.Type = obj.TYPE_REG
  1879  		p.To.Reg = ppc64.REG_LR
  1880  
  1881  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1882  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1883  		}
  1884  
  1885  		pp := s.Call(v)
  1886  
  1887  		// Convert the call into a blrl with hint this is not a subroutine return.
  1888  		// The full bclrl opcode must be specified when passing a hint.
  1889  		pp.As = ppc64.ABCL
  1890  		pp.From.Type = obj.TYPE_CONST
  1891  		pp.From.Offset = ppc64.BO_ALWAYS
  1892  		pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
  1893  		pp.To.Reg = ppc64.REG_LR
  1894  		pp.AddRestSourceConst(1)
  1895  
  1896  		if ppc64.NeedTOCpointer(base.Ctxt) {
  1897  			// When compiling Go into PIC, the function we just
  1898  			// called via pointer might have been implemented in
  1899  			// a separate module and so overwritten the TOC
  1900  			// pointer in R2; reload it.
  1901  			q := s.Prog(ppc64.AMOVD)
  1902  			q.From.Type = obj.TYPE_MEM
  1903  			q.From.Offset = 24
  1904  			q.From.Reg = ppc64.REGSP
  1905  			q.To.Type = obj.TYPE_REG
  1906  			q.To.Reg = ppc64.REG_R2
  1907  		}
  1908  
  1909  	case ssa.OpPPC64LoweredWB:
  1910  		p := s.Prog(obj.ACALL)
  1911  		p.To.Type = obj.TYPE_MEM
  1912  		p.To.Name = obj.NAME_EXTERN
  1913  		// AuxInt encodes how many buffer entries we need.
  1914  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1915  
  1916  	case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
  1917  		p := s.Prog(obj.ACALL)
  1918  		p.To.Type = obj.TYPE_MEM
  1919  		p.To.Name = obj.NAME_EXTERN
  1920  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1921  		s.UseArgs(16) // space used in callee args area by assembly stubs
  1922  
  1923  	case ssa.OpPPC64LoweredNilCheck:
  1924  		if buildcfg.GOOS == "aix" {
  1925  			// CMP Rarg0, $0
  1926  			// BNE 2(PC)
  1927  			// STW R0, 0(R0)
  1928  			// NOP (so the BNE has somewhere to land)
  1929  
  1930  			// CMP Rarg0, $0
  1931  			p := s.Prog(ppc64.ACMP)
  1932  			p.From.Type = obj.TYPE_REG
  1933  			p.From.Reg = v.Args[0].Reg()
  1934  			p.To.Type = obj.TYPE_CONST
  1935  			p.To.Offset = 0
  1936  
  1937  			// BNE 2(PC)
  1938  			p2 := s.Prog(ppc64.ABNE)
  1939  			p2.To.Type = obj.TYPE_BRANCH
  1940  
  1941  			// STW R0, 0(R0)
  1942  			// Write at 0 is forbidden and will trigger a SIGSEGV
  1943  			p = s.Prog(ppc64.AMOVW)
  1944  			p.From.Type = obj.TYPE_REG
  1945  			p.From.Reg = ppc64.REG_R0
  1946  			p.To.Type = obj.TYPE_MEM
  1947  			p.To.Reg = ppc64.REG_R0
  1948  
  1949  			// NOP (so the BNE has somewhere to land)
  1950  			nop := s.Prog(obj.ANOP)
  1951  			p2.To.SetTarget(nop)
  1952  
  1953  		} else {
  1954  			// Issue a load which will fault if arg is nil.
  1955  			p := s.Prog(ppc64.AMOVBZ)
  1956  			p.From.Type = obj.TYPE_MEM
  1957  			p.From.Reg = v.Args[0].Reg()
  1958  			ssagen.AddAux(&p.From, v)
  1959  			p.To.Type = obj.TYPE_REG
  1960  			p.To.Reg = ppc64.REGTMP
  1961  		}
  1962  		if logopt.Enabled() {
  1963  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1964  		}
  1965  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1966  			base.WarnfAt(v.Pos, "generated nil check")
  1967  		}
  1968  
  1969  	// These should be resolved by rules and not make it here.
  1970  	case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
  1971  		ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
  1972  		ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
  1973  		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
  1974  	case ssa.OpPPC64InvertFlags:
  1975  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1976  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1977  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1978  	case ssa.OpClobber, ssa.OpClobberReg:
  1979  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1980  	default:
  1981  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1982  	}
  1983  }
  1984  
  1985  var blockJump = [...]struct {
  1986  	asm, invasm     obj.As
  1987  	asmeq, invasmun bool
  1988  }{
  1989  	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  1990  	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  1991  
  1992  	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1993  	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  1994  	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  1995  	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1996  
  1997  	// TODO: need to work FP comparisons into block jumps
  1998  	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1999  	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  2000  	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  2001  	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  2002  }
  2003  
  2004  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  2005  	switch b.Kind {
  2006  	case ssa.BlockDefer:
  2007  		// defer returns in R3:
  2008  		// 0 if we should continue executing
  2009  		// 1 if we should jump to deferreturn call
  2010  		p := s.Prog(ppc64.ACMP)
  2011  		p.From.Type = obj.TYPE_REG
  2012  		p.From.Reg = ppc64.REG_R3
  2013  		p.To.Type = obj.TYPE_CONST
  2014  		p.To.Offset = 0
  2015  
  2016  		p = s.Prog(ppc64.ABNE)
  2017  		p.To.Type = obj.TYPE_BRANCH
  2018  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  2019  		if b.Succs[0].Block() != next {
  2020  			p := s.Prog(obj.AJMP)
  2021  			p.To.Type = obj.TYPE_BRANCH
  2022  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2023  		}
  2024  
  2025  	case ssa.BlockPlain:
  2026  		if b.Succs[0].Block() != next {
  2027  			p := s.Prog(obj.AJMP)
  2028  			p.To.Type = obj.TYPE_BRANCH
  2029  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2030  		}
  2031  	case ssa.BlockExit, ssa.BlockRetJmp:
  2032  	case ssa.BlockRet:
  2033  		s.Prog(obj.ARET)
  2034  
  2035  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  2036  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  2037  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  2038  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  2039  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  2040  		jmp := blockJump[b.Kind]
  2041  		switch next {
  2042  		case b.Succs[0].Block():
  2043  			s.Br(jmp.invasm, b.Succs[1].Block())
  2044  			if jmp.invasmun {
  2045  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2046  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  2047  			}
  2048  		case b.Succs[1].Block():
  2049  			s.Br(jmp.asm, b.Succs[0].Block())
  2050  			if jmp.asmeq {
  2051  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2052  			}
  2053  		default:
  2054  			if b.Likely != ssa.BranchUnlikely {
  2055  				s.Br(jmp.asm, b.Succs[0].Block())
  2056  				if jmp.asmeq {
  2057  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2058  				}
  2059  				s.Br(obj.AJMP, b.Succs[1].Block())
  2060  			} else {
  2061  				s.Br(jmp.invasm, b.Succs[1].Block())
  2062  				if jmp.invasmun {
  2063  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2064  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  2065  				}
  2066  				s.Br(obj.AJMP, b.Succs[0].Block())
  2067  			}
  2068  		}
  2069  	default:
  2070  		b.Fatalf("branch not implemented: %s", b.LongString())
  2071  	}
  2072  }
  2073  
  2074  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2075  	p := s.Prog(loadByType(t))
  2076  	p.From.Type = obj.TYPE_MEM
  2077  	p.From.Name = obj.NAME_AUTO
  2078  	p.From.Sym = n.Linksym()
  2079  	p.From.Offset = n.FrameOffset() + off
  2080  	p.To.Type = obj.TYPE_REG
  2081  	p.To.Reg = reg
  2082  	return p
  2083  }
  2084  
  2085  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2086  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  2087  	p.To.Name = obj.NAME_PARAM
  2088  	p.To.Sym = n.Linksym()
  2089  	p.Pos = p.Pos.WithNotStmt()
  2090  	return p
  2091  }
  2092  

View as plain text