Source file src/cmd/compile/internal/ppc64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/objw"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/ssagen"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/ppc64"
    17  	"internal/buildcfg"
    18  	"math"
    19  	"strings"
    20  )
    21  
    22  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    23  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    24  	//	flive := b.FlagsLiveAtEnd
    25  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    26  	//		flive = true
    27  	//	}
    28  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    29  	//		v := b.Values[i]
    30  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    31  	//			// The "mark" is any non-nil Aux value.
    32  	//			v.Aux = v
    33  	//		}
    34  	//		if v.Type.IsFlags() {
    35  	//			flive = false
    36  	//		}
    37  	//		for _, a := range v.Args {
    38  	//			if a.Type.IsFlags() {
    39  	//				flive = true
    40  	//			}
    41  	//		}
    42  	//	}
    43  }
    44  
    45  // loadByType returns the load instruction of the given type.
    46  func loadByType(t *types.Type) obj.As {
    47  	if t.IsFloat() {
    48  		switch t.Size() {
    49  		case 4:
    50  			return ppc64.AFMOVS
    51  		case 8:
    52  			return ppc64.AFMOVD
    53  		}
    54  	} else {
    55  		switch t.Size() {
    56  		case 1:
    57  			if t.IsSigned() {
    58  				return ppc64.AMOVB
    59  			} else {
    60  				return ppc64.AMOVBZ
    61  			}
    62  		case 2:
    63  			if t.IsSigned() {
    64  				return ppc64.AMOVH
    65  			} else {
    66  				return ppc64.AMOVHZ
    67  			}
    68  		case 4:
    69  			if t.IsSigned() {
    70  				return ppc64.AMOVW
    71  			} else {
    72  				return ppc64.AMOVWZ
    73  			}
    74  		case 8:
    75  			return ppc64.AMOVD
    76  		}
    77  	}
    78  	panic("bad load type")
    79  }
    80  
    81  // storeByType returns the store instruction of the given type.
    82  func storeByType(t *types.Type) obj.As {
    83  	if t.IsFloat() {
    84  		switch t.Size() {
    85  		case 4:
    86  			return ppc64.AFMOVS
    87  		case 8:
    88  			return ppc64.AFMOVD
    89  		}
    90  	} else {
    91  		switch t.Size() {
    92  		case 1:
    93  			return ppc64.AMOVB
    94  		case 2:
    95  			return ppc64.AMOVH
    96  		case 4:
    97  			return ppc64.AMOVW
    98  		case 8:
    99  			return ppc64.AMOVD
   100  		}
   101  	}
   102  	panic("bad store type")
   103  }
   104  
   105  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   106  	switch v.Op {
   107  	case ssa.OpCopy:
   108  		t := v.Type
   109  		if t.IsMemory() {
   110  			return
   111  		}
   112  		x := v.Args[0].Reg()
   113  		y := v.Reg()
   114  		if x != y {
   115  			rt := obj.TYPE_REG
   116  			op := ppc64.AMOVD
   117  
   118  			if t.IsFloat() {
   119  				op = ppc64.AFMOVD
   120  			}
   121  			p := s.Prog(op)
   122  			p.From.Type = rt
   123  			p.From.Reg = x
   124  			p.To.Type = rt
   125  			p.To.Reg = y
   126  		}
   127  
   128  	case ssa.OpPPC64LoweredAtomicAnd8,
   129  		ssa.OpPPC64LoweredAtomicAnd32,
   130  		ssa.OpPPC64LoweredAtomicOr8,
   131  		ssa.OpPPC64LoweredAtomicOr32:
   132  		// LWSYNC
   133  		// LBAR/LWAR	(Rarg0), Rtmp
   134  		// AND/OR	Rarg1, Rtmp
   135  		// STBCCC/STWCCC Rtmp, (Rarg0)
   136  		// BNE		-3(PC)
   137  		ld := ppc64.ALBAR
   138  		st := ppc64.ASTBCCC
   139  		if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
   140  			ld = ppc64.ALWAR
   141  			st = ppc64.ASTWCCC
   142  		}
   143  		r0 := v.Args[0].Reg()
   144  		r1 := v.Args[1].Reg()
   145  		// LWSYNC - Assuming shared data not write-through-required nor
   146  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   147  		plwsync := s.Prog(ppc64.ALWSYNC)
   148  		plwsync.To.Type = obj.TYPE_NONE
   149  		// LBAR or LWAR
   150  		p := s.Prog(ld)
   151  		p.From.Type = obj.TYPE_MEM
   152  		p.From.Reg = r0
   153  		p.To.Type = obj.TYPE_REG
   154  		p.To.Reg = ppc64.REGTMP
   155  		// AND/OR reg1,out
   156  		p1 := s.Prog(v.Op.Asm())
   157  		p1.From.Type = obj.TYPE_REG
   158  		p1.From.Reg = r1
   159  		p1.To.Type = obj.TYPE_REG
   160  		p1.To.Reg = ppc64.REGTMP
   161  		// STBCCC or STWCCC
   162  		p2 := s.Prog(st)
   163  		p2.From.Type = obj.TYPE_REG
   164  		p2.From.Reg = ppc64.REGTMP
   165  		p2.To.Type = obj.TYPE_MEM
   166  		p2.To.Reg = r0
   167  		p2.RegTo2 = ppc64.REGTMP
   168  		// BNE retry
   169  		p3 := s.Prog(ppc64.ABNE)
   170  		p3.To.Type = obj.TYPE_BRANCH
   171  		p3.To.SetTarget(p)
   172  
   173  	case ssa.OpPPC64LoweredAtomicAdd32,
   174  		ssa.OpPPC64LoweredAtomicAdd64:
   175  		// LWSYNC
   176  		// LDAR/LWAR    (Rarg0), Rout
   177  		// ADD		Rarg1, Rout
   178  		// STDCCC/STWCCC Rout, (Rarg0)
   179  		// BNE         -3(PC)
   180  		// MOVW		Rout,Rout (if Add32)
   181  		ld := ppc64.ALDAR
   182  		st := ppc64.ASTDCCC
   183  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   184  			ld = ppc64.ALWAR
   185  			st = ppc64.ASTWCCC
   186  		}
   187  		r0 := v.Args[0].Reg()
   188  		r1 := v.Args[1].Reg()
   189  		out := v.Reg0()
   190  		// LWSYNC - Assuming shared data not write-through-required nor
   191  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   192  		plwsync := s.Prog(ppc64.ALWSYNC)
   193  		plwsync.To.Type = obj.TYPE_NONE
   194  		// LDAR or LWAR
   195  		p := s.Prog(ld)
   196  		p.From.Type = obj.TYPE_MEM
   197  		p.From.Reg = r0
   198  		p.To.Type = obj.TYPE_REG
   199  		p.To.Reg = out
   200  		// ADD reg1,out
   201  		p1 := s.Prog(ppc64.AADD)
   202  		p1.From.Type = obj.TYPE_REG
   203  		p1.From.Reg = r1
   204  		p1.To.Reg = out
   205  		p1.To.Type = obj.TYPE_REG
   206  		// STDCCC or STWCCC
   207  		p3 := s.Prog(st)
   208  		p3.From.Type = obj.TYPE_REG
   209  		p3.From.Reg = out
   210  		p3.To.Type = obj.TYPE_MEM
   211  		p3.To.Reg = r0
   212  		// BNE retry
   213  		p4 := s.Prog(ppc64.ABNE)
   214  		p4.To.Type = obj.TYPE_BRANCH
   215  		p4.To.SetTarget(p)
   216  
   217  		// Ensure a 32 bit result
   218  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   219  			p5 := s.Prog(ppc64.AMOVWZ)
   220  			p5.To.Type = obj.TYPE_REG
   221  			p5.To.Reg = out
   222  			p5.From.Type = obj.TYPE_REG
   223  			p5.From.Reg = out
   224  		}
   225  
   226  	case ssa.OpPPC64LoweredAtomicExchange32,
   227  		ssa.OpPPC64LoweredAtomicExchange64:
   228  		// LWSYNC
   229  		// LDAR/LWAR    (Rarg0), Rout
   230  		// STDCCC/STWCCC Rout, (Rarg0)
   231  		// BNE         -2(PC)
   232  		// ISYNC
   233  		ld := ppc64.ALDAR
   234  		st := ppc64.ASTDCCC
   235  		if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
   236  			ld = ppc64.ALWAR
   237  			st = ppc64.ASTWCCC
   238  		}
   239  		r0 := v.Args[0].Reg()
   240  		r1 := v.Args[1].Reg()
   241  		out := v.Reg0()
   242  		// LWSYNC - Assuming shared data not write-through-required nor
   243  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   244  		plwsync := s.Prog(ppc64.ALWSYNC)
   245  		plwsync.To.Type = obj.TYPE_NONE
   246  		// LDAR or LWAR
   247  		p := s.Prog(ld)
   248  		p.From.Type = obj.TYPE_MEM
   249  		p.From.Reg = r0
   250  		p.To.Type = obj.TYPE_REG
   251  		p.To.Reg = out
   252  		// STDCCC or STWCCC
   253  		p1 := s.Prog(st)
   254  		p1.From.Type = obj.TYPE_REG
   255  		p1.From.Reg = r1
   256  		p1.To.Type = obj.TYPE_MEM
   257  		p1.To.Reg = r0
   258  		// BNE retry
   259  		p2 := s.Prog(ppc64.ABNE)
   260  		p2.To.Type = obj.TYPE_BRANCH
   261  		p2.To.SetTarget(p)
   262  		// ISYNC
   263  		pisync := s.Prog(ppc64.AISYNC)
   264  		pisync.To.Type = obj.TYPE_NONE
   265  
   266  	case ssa.OpPPC64LoweredAtomicLoad8,
   267  		ssa.OpPPC64LoweredAtomicLoad32,
   268  		ssa.OpPPC64LoweredAtomicLoad64,
   269  		ssa.OpPPC64LoweredAtomicLoadPtr:
   270  		// SYNC
   271  		// MOVB/MOVD/MOVW (Rarg0), Rout
   272  		// CMP Rout,Rout
   273  		// BNE 1(PC)
   274  		// ISYNC
   275  		ld := ppc64.AMOVD
   276  		cmp := ppc64.ACMP
   277  		switch v.Op {
   278  		case ssa.OpPPC64LoweredAtomicLoad8:
   279  			ld = ppc64.AMOVBZ
   280  		case ssa.OpPPC64LoweredAtomicLoad32:
   281  			ld = ppc64.AMOVWZ
   282  			cmp = ppc64.ACMPW
   283  		}
   284  		arg0 := v.Args[0].Reg()
   285  		out := v.Reg0()
   286  		// SYNC when AuxInt == 1; otherwise, load-acquire
   287  		if v.AuxInt == 1 {
   288  			psync := s.Prog(ppc64.ASYNC)
   289  			psync.To.Type = obj.TYPE_NONE
   290  		}
   291  		// Load
   292  		p := s.Prog(ld)
   293  		p.From.Type = obj.TYPE_MEM
   294  		p.From.Reg = arg0
   295  		p.To.Type = obj.TYPE_REG
   296  		p.To.Reg = out
   297  		// CMP
   298  		p1 := s.Prog(cmp)
   299  		p1.From.Type = obj.TYPE_REG
   300  		p1.From.Reg = out
   301  		p1.To.Type = obj.TYPE_REG
   302  		p1.To.Reg = out
   303  		// BNE
   304  		p2 := s.Prog(ppc64.ABNE)
   305  		p2.To.Type = obj.TYPE_BRANCH
   306  		// ISYNC
   307  		pisync := s.Prog(ppc64.AISYNC)
   308  		pisync.To.Type = obj.TYPE_NONE
   309  		p2.To.SetTarget(pisync)
   310  
   311  	case ssa.OpPPC64LoweredAtomicStore8,
   312  		ssa.OpPPC64LoweredAtomicStore32,
   313  		ssa.OpPPC64LoweredAtomicStore64:
   314  		// SYNC or LWSYNC
   315  		// MOVB/MOVW/MOVD arg1,(arg0)
   316  		st := ppc64.AMOVD
   317  		switch v.Op {
   318  		case ssa.OpPPC64LoweredAtomicStore8:
   319  			st = ppc64.AMOVB
   320  		case ssa.OpPPC64LoweredAtomicStore32:
   321  			st = ppc64.AMOVW
   322  		}
   323  		arg0 := v.Args[0].Reg()
   324  		arg1 := v.Args[1].Reg()
   325  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   326  		// SYNC
   327  		syncOp := ppc64.ASYNC
   328  		if v.AuxInt == 0 {
   329  			syncOp = ppc64.ALWSYNC
   330  		}
   331  		psync := s.Prog(syncOp)
   332  		psync.To.Type = obj.TYPE_NONE
   333  		// Store
   334  		p := s.Prog(st)
   335  		p.To.Type = obj.TYPE_MEM
   336  		p.To.Reg = arg0
   337  		p.From.Type = obj.TYPE_REG
   338  		p.From.Reg = arg1
   339  
   340  	case ssa.OpPPC64LoweredAtomicCas64,
   341  		ssa.OpPPC64LoweredAtomicCas32:
   342  		// MOVD        $0, Rout
   343  		// LWSYNC
   344  		// loop:
   345  		// LDAR        (Rarg0), MutexHint, Rtmp
   346  		// CMP         Rarg1, Rtmp
   347  		// BNE         end
   348  		// STDCCC      Rarg2, (Rarg0)
   349  		// BNE         loop
   350  		// MOVD        $1, Rout
   351  		// end:
   352  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   353  		ld := ppc64.ALDAR
   354  		st := ppc64.ASTDCCC
   355  		cmp := ppc64.ACMP
   356  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   357  			ld = ppc64.ALWAR
   358  			st = ppc64.ASTWCCC
   359  			cmp = ppc64.ACMPW
   360  		}
   361  		r0 := v.Args[0].Reg()
   362  		r1 := v.Args[1].Reg()
   363  		r2 := v.Args[2].Reg()
   364  		out := v.Reg0()
   365  		// Initialize return value to false
   366  		p := s.Prog(ppc64.AMOVD)
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = 0
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = out
   371  		// LWSYNC - Assuming shared data not write-through-required nor
   372  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   373  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   374  		plwsync1.To.Type = obj.TYPE_NONE
   375  		// LDAR or LWAR
   376  		p0 := s.Prog(ld)
   377  		p0.From.Type = obj.TYPE_MEM
   378  		p0.From.Reg = r0
   379  		p0.To.Type = obj.TYPE_REG
   380  		p0.To.Reg = ppc64.REGTMP
   381  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   382  		// the release hint.
   383  		if v.AuxInt == 0 {
   384  			p0.AddRestSourceConst(0)
   385  		}
   386  		// CMP reg1,reg2
   387  		p1 := s.Prog(cmp)
   388  		p1.From.Type = obj.TYPE_REG
   389  		p1.From.Reg = r1
   390  		p1.To.Reg = ppc64.REGTMP
   391  		p1.To.Type = obj.TYPE_REG
   392  		// BNE done with return value = false
   393  		p2 := s.Prog(ppc64.ABNE)
   394  		p2.To.Type = obj.TYPE_BRANCH
   395  		// STDCCC or STWCCC
   396  		p3 := s.Prog(st)
   397  		p3.From.Type = obj.TYPE_REG
   398  		p3.From.Reg = r2
   399  		p3.To.Type = obj.TYPE_MEM
   400  		p3.To.Reg = r0
   401  		// BNE retry
   402  		p4 := s.Prog(ppc64.ABNE)
   403  		p4.To.Type = obj.TYPE_BRANCH
   404  		p4.To.SetTarget(p0)
   405  		// return value true
   406  		p5 := s.Prog(ppc64.AMOVD)
   407  		p5.From.Type = obj.TYPE_CONST
   408  		p5.From.Offset = 1
   409  		p5.To.Type = obj.TYPE_REG
   410  		p5.To.Reg = out
   411  		// LWSYNC - Assuming shared data not write-through-required nor
   412  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   413  		// If the operation is a CAS-Release, then synchronization is not necessary.
   414  		if v.AuxInt != 0 {
   415  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   416  			plwsync2.To.Type = obj.TYPE_NONE
   417  			p2.To.SetTarget(plwsync2)
   418  		} else {
   419  			// done (label)
   420  			p6 := s.Prog(obj.ANOP)
   421  			p2.To.SetTarget(p6)
   422  		}
   423  
   424  	case ssa.OpPPC64LoweredPubBarrier:
   425  		// LWSYNC
   426  		s.Prog(v.Op.Asm())
   427  
   428  	case ssa.OpPPC64LoweredGetClosurePtr:
   429  		// Closure pointer is R11 (already)
   430  		ssagen.CheckLoweredGetClosurePtr(v)
   431  
   432  	case ssa.OpPPC64LoweredGetCallerSP:
   433  		// caller's SP is FixedFrameSize below the address of the first arg
   434  		p := s.Prog(ppc64.AMOVD)
   435  		p.From.Type = obj.TYPE_ADDR
   436  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
   437  		p.From.Name = obj.NAME_PARAM
   438  		p.To.Type = obj.TYPE_REG
   439  		p.To.Reg = v.Reg()
   440  
   441  	case ssa.OpPPC64LoweredGetCallerPC:
   442  		p := s.Prog(obj.AGETCALLERPC)
   443  		p.To.Type = obj.TYPE_REG
   444  		p.To.Reg = v.Reg()
   445  
   446  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   447  		// input is already rounded
   448  
   449  	case ssa.OpLoadReg:
   450  		loadOp := loadByType(v.Type)
   451  		p := s.Prog(loadOp)
   452  		ssagen.AddrAuto(&p.From, v.Args[0])
   453  		p.To.Type = obj.TYPE_REG
   454  		p.To.Reg = v.Reg()
   455  
   456  	case ssa.OpStoreReg:
   457  		storeOp := storeByType(v.Type)
   458  		p := s.Prog(storeOp)
   459  		p.From.Type = obj.TYPE_REG
   460  		p.From.Reg = v.Args[0].Reg()
   461  		ssagen.AddrAuto(&p.To, v)
   462  
   463  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   464  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   465  		// The loop only runs once.
   466  		for _, a := range v.Block.Func.RegArgs {
   467  			// Pass the spill/unspill information along to the assembler, offset by size of
   468  			// the saved LR slot.
   469  			addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   470  			s.FuncInfo().AddSpill(
   471  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   472  		}
   473  		v.Block.Func.RegArgs = nil
   474  
   475  		ssagen.CheckArgReg(v)
   476  
   477  	case ssa.OpPPC64DIVD:
   478  		// For now,
   479  		//
   480  		// cmp arg1, -1
   481  		// be  ahead
   482  		// v = arg0 / arg1
   483  		// b over
   484  		// ahead: v = - arg0
   485  		// over: nop
   486  		r := v.Reg()
   487  		r0 := v.Args[0].Reg()
   488  		r1 := v.Args[1].Reg()
   489  
   490  		p := s.Prog(ppc64.ACMP)
   491  		p.From.Type = obj.TYPE_REG
   492  		p.From.Reg = r1
   493  		p.To.Type = obj.TYPE_CONST
   494  		p.To.Offset = -1
   495  
   496  		pbahead := s.Prog(ppc64.ABEQ)
   497  		pbahead.To.Type = obj.TYPE_BRANCH
   498  
   499  		p = s.Prog(v.Op.Asm())
   500  		p.From.Type = obj.TYPE_REG
   501  		p.From.Reg = r1
   502  		p.Reg = r0
   503  		p.To.Type = obj.TYPE_REG
   504  		p.To.Reg = r
   505  
   506  		pbover := s.Prog(obj.AJMP)
   507  		pbover.To.Type = obj.TYPE_BRANCH
   508  
   509  		p = s.Prog(ppc64.ANEG)
   510  		p.To.Type = obj.TYPE_REG
   511  		p.To.Reg = r
   512  		p.From.Type = obj.TYPE_REG
   513  		p.From.Reg = r0
   514  		pbahead.To.SetTarget(p)
   515  
   516  		p = s.Prog(obj.ANOP)
   517  		pbover.To.SetTarget(p)
   518  
   519  	case ssa.OpPPC64DIVW:
   520  		// word-width version of above
   521  		r := v.Reg()
   522  		r0 := v.Args[0].Reg()
   523  		r1 := v.Args[1].Reg()
   524  
   525  		p := s.Prog(ppc64.ACMPW)
   526  		p.From.Type = obj.TYPE_REG
   527  		p.From.Reg = r1
   528  		p.To.Type = obj.TYPE_CONST
   529  		p.To.Offset = -1
   530  
   531  		pbahead := s.Prog(ppc64.ABEQ)
   532  		pbahead.To.Type = obj.TYPE_BRANCH
   533  
   534  		p = s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = r1
   537  		p.Reg = r0
   538  		p.To.Type = obj.TYPE_REG
   539  		p.To.Reg = r
   540  
   541  		pbover := s.Prog(obj.AJMP)
   542  		pbover.To.Type = obj.TYPE_BRANCH
   543  
   544  		p = s.Prog(ppc64.ANEG)
   545  		p.To.Type = obj.TYPE_REG
   546  		p.To.Reg = r
   547  		p.From.Type = obj.TYPE_REG
   548  		p.From.Reg = r0
   549  		pbahead.To.SetTarget(p)
   550  
   551  		p = s.Prog(obj.ANOP)
   552  		pbover.To.SetTarget(p)
   553  
   554  	case ssa.OpPPC64CLRLSLWI:
   555  		r := v.Reg()
   556  		r1 := v.Args[0].Reg()
   557  		shifts := v.AuxInt
   558  		p := s.Prog(v.Op.Asm())
   559  		// clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
   560  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   561  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   562  		p.Reg = r1
   563  		p.To.Type = obj.TYPE_REG
   564  		p.To.Reg = r
   565  
   566  	case ssa.OpPPC64CLRLSLDI:
   567  		r := v.Reg()
   568  		r1 := v.Args[0].Reg()
   569  		shifts := v.AuxInt
   570  		p := s.Prog(v.Op.Asm())
   571  		// clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
   572  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   573  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   574  		p.Reg = r1
   575  		p.To.Type = obj.TYPE_REG
   576  		p.To.Reg = r
   577  
   578  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   579  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   580  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   581  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   582  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   583  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   584  		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
   585  		ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
   586  		r := v.Reg()
   587  		r1 := v.Args[0].Reg()
   588  		r2 := v.Args[1].Reg()
   589  		p := s.Prog(v.Op.Asm())
   590  		p.From.Type = obj.TYPE_REG
   591  		p.From.Reg = r2
   592  		p.Reg = r1
   593  		p.To.Type = obj.TYPE_REG
   594  		p.To.Reg = r
   595  
   596  	case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
   597  		ssa.OpPPC64ANDNCC:
   598  		r1 := v.Args[0].Reg()
   599  		r2 := v.Args[1].Reg()
   600  		p := s.Prog(v.Op.Asm())
   601  		p.From.Type = obj.TYPE_REG
   602  		p.From.Reg = r2
   603  		p.Reg = r1
   604  		p.To.Type = obj.TYPE_REG
   605  		p.To.Reg = v.Reg0()
   606  
   607  	case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
   608  		p := s.Prog(v.Op.Asm())
   609  		p.To.Type = obj.TYPE_REG
   610  		p.To.Reg = v.Reg0()
   611  		p.From.Type = obj.TYPE_REG
   612  		p.From.Reg = v.Args[0].Reg()
   613  
   614  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   615  		p := s.Prog(v.Op.Asm())
   616  		p.From.Type = obj.TYPE_CONST
   617  		p.From.Offset = v.AuxInt
   618  		p.Reg = v.Args[0].Reg()
   619  		p.To.Type = obj.TYPE_REG
   620  		p.To.Reg = v.Reg()
   621  
   622  		// Auxint holds encoded rotate + mask
   623  	case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
   624  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   625  		p := s.Prog(v.Op.Asm())
   626  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   627  		p.Reg = v.Args[0].Reg()
   628  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
   629  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   630  		// Auxint holds mask
   631  
   632  	case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC, ssa.OpPPC64RLDICR:
   633  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   634  		p := s.Prog(v.Op.Asm())
   635  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
   636  		switch v.Op {
   637  		case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC:
   638  			p.AddRestSourceConst(mb)
   639  		case ssa.OpPPC64RLDICR:
   640  			p.AddRestSourceConst(me)
   641  		}
   642  		p.Reg = v.Args[0].Reg()
   643  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.ResultReg()}
   644  
   645  	case ssa.OpPPC64RLWNM:
   646  		_, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   647  		p := s.Prog(v.Op.Asm())
   648  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   649  		p.Reg = v.Args[0].Reg()
   650  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
   651  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   652  
   653  	case ssa.OpPPC64MADDLD:
   654  		r := v.Reg()
   655  		r1 := v.Args[0].Reg()
   656  		r2 := v.Args[1].Reg()
   657  		r3 := v.Args[2].Reg()
   658  		// r = r1*r2 ± r3
   659  		p := s.Prog(v.Op.Asm())
   660  		p.From.Type = obj.TYPE_REG
   661  		p.From.Reg = r1
   662  		p.Reg = r2
   663  		p.AddRestSourceReg(r3)
   664  		p.To.Type = obj.TYPE_REG
   665  		p.To.Reg = r
   666  
   667  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   668  		r := v.Reg()
   669  		r1 := v.Args[0].Reg()
   670  		r2 := v.Args[1].Reg()
   671  		r3 := v.Args[2].Reg()
   672  		// r = r1*r2 ± r3
   673  		p := s.Prog(v.Op.Asm())
   674  		p.From.Type = obj.TYPE_REG
   675  		p.From.Reg = r1
   676  		p.Reg = r3
   677  		p.AddRestSourceReg(r2)
   678  		p.To.Type = obj.TYPE_REG
   679  		p.To.Reg = r
   680  
   681  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
   682  		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
   683  		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
   684  		ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
   685  		r := v.Reg()
   686  		p := s.Prog(v.Op.Asm())
   687  		p.To.Type = obj.TYPE_REG
   688  		p.To.Reg = r
   689  		p.From.Type = obj.TYPE_REG
   690  		p.From.Reg = v.Args[0].Reg()
   691  
   692  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   693  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
   694  		ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst,
   695  		ssa.OpPPC64ANDconst:
   696  		p := s.Prog(v.Op.Asm())
   697  		p.Reg = v.Args[0].Reg()
   698  		p.From.Type = obj.TYPE_CONST
   699  		p.From.Offset = v.AuxInt
   700  		p.To.Type = obj.TYPE_REG
   701  		p.To.Reg = v.Reg()
   702  
   703  	case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
   704  		r := v.Reg0() // CA is the first, implied argument.
   705  		r1 := v.Args[0].Reg()
   706  		r2 := v.Args[1].Reg()
   707  		p := s.Prog(v.Op.Asm())
   708  		p.From.Type = obj.TYPE_REG
   709  		p.From.Reg = r2
   710  		p.Reg = r1
   711  		p.To.Type = obj.TYPE_REG
   712  		p.To.Reg = r
   713  
   714  	case ssa.OpPPC64ADDZE:
   715  		p := s.Prog(v.Op.Asm())
   716  		p.From.Type = obj.TYPE_REG
   717  		p.From.Reg = v.Args[0].Reg()
   718  		p.To.Type = obj.TYPE_REG
   719  		p.To.Reg = v.Reg0()
   720  
   721  	case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
   722  		p := s.Prog(v.Op.Asm())
   723  		p.From.Type = obj.TYPE_REG
   724  		p.From.Reg = ppc64.REG_R0
   725  		p.To.Type = obj.TYPE_REG
   726  		p.To.Reg = v.Reg()
   727  
   728  	case ssa.OpPPC64ADDCconst:
   729  		p := s.Prog(v.Op.Asm())
   730  		p.Reg = v.Args[0].Reg()
   731  		p.From.Type = obj.TYPE_CONST
   732  		p.From.Offset = v.AuxInt
   733  		p.To.Type = obj.TYPE_REG
   734  		// Output is a pair, the second is the CA, which is implied.
   735  		p.To.Reg = v.Reg0()
   736  
   737  	case ssa.OpPPC64SUBCconst:
   738  		p := s.Prog(v.Op.Asm())
   739  		p.AddRestSourceConst(v.AuxInt)
   740  		p.From.Type = obj.TYPE_REG
   741  		p.From.Reg = v.Args[0].Reg()
   742  		p.To.Type = obj.TYPE_REG
   743  		p.To.Reg = v.Reg0()
   744  
   745  	case ssa.OpPPC64SUBFCconst:
   746  		p := s.Prog(v.Op.Asm())
   747  		p.AddRestSourceConst(v.AuxInt)
   748  		p.From.Type = obj.TYPE_REG
   749  		p.From.Reg = v.Args[0].Reg()
   750  		p.To.Type = obj.TYPE_REG
   751  		p.To.Reg = v.Reg()
   752  
   753  	case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
   754  		p := s.Prog(v.Op.Asm())
   755  		p.Reg = v.Args[0].Reg()
   756  		p.From.Type = obj.TYPE_CONST
   757  		p.From.Offset = v.AuxInt
   758  		p.To.Type = obj.TYPE_REG
   759  		p.To.Reg = v.Reg0()
   760  
   761  	case ssa.OpPPC64MOVDaddr:
   762  		switch v.Aux.(type) {
   763  		default:
   764  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   765  		case nil:
   766  			// If aux offset and aux int are both 0, and the same
   767  			// input and output regs are used, no instruction
   768  			// needs to be generated, since it would just be
   769  			// addi rx, rx, 0.
   770  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   771  				p := s.Prog(ppc64.AMOVD)
   772  				p.From.Type = obj.TYPE_ADDR
   773  				p.From.Reg = v.Args[0].Reg()
   774  				p.From.Offset = v.AuxInt
   775  				p.To.Type = obj.TYPE_REG
   776  				p.To.Reg = v.Reg()
   777  			}
   778  
   779  		case *obj.LSym, ir.Node:
   780  			p := s.Prog(ppc64.AMOVD)
   781  			p.From.Type = obj.TYPE_ADDR
   782  			p.From.Reg = v.Args[0].Reg()
   783  			p.To.Type = obj.TYPE_REG
   784  			p.To.Reg = v.Reg()
   785  			ssagen.AddAux(&p.From, v)
   786  
   787  		}
   788  
   789  	case ssa.OpPPC64MOVDconst:
   790  		p := s.Prog(v.Op.Asm())
   791  		p.From.Type = obj.TYPE_CONST
   792  		p.From.Offset = v.AuxInt
   793  		p.To.Type = obj.TYPE_REG
   794  		p.To.Reg = v.Reg()
   795  
   796  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   797  		p := s.Prog(v.Op.Asm())
   798  		p.From.Type = obj.TYPE_FCONST
   799  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   800  		p.To.Type = obj.TYPE_REG
   801  		p.To.Reg = v.Reg()
   802  
   803  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   804  		p := s.Prog(v.Op.Asm())
   805  		p.From.Type = obj.TYPE_REG
   806  		p.From.Reg = v.Args[0].Reg()
   807  		p.To.Type = obj.TYPE_REG
   808  		p.To.Reg = v.Args[1].Reg()
   809  
   810  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   811  		p := s.Prog(v.Op.Asm())
   812  		p.From.Type = obj.TYPE_REG
   813  		p.From.Reg = v.Args[0].Reg()
   814  		p.To.Type = obj.TYPE_CONST
   815  		p.To.Offset = v.AuxInt
   816  
   817  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   818  		// Shift in register to required size
   819  		p := s.Prog(v.Op.Asm())
   820  		p.From.Type = obj.TYPE_REG
   821  		p.From.Reg = v.Args[0].Reg()
   822  		p.To.Reg = v.Reg()
   823  		p.To.Type = obj.TYPE_REG
   824  
   825  	case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
   826  
   827  		// MOVDload and MOVWload are DS form instructions that are restricted to
   828  		// offsets that are a multiple of 4. If the offset is not a multiple of 4,
   829  		// then the address of the symbol to be loaded is computed (base + offset)
   830  		// and used as the new base register and the offset field in the instruction
   831  		// can be set to zero.
   832  
   833  		// This same problem can happen with gostrings since the final offset is not
   834  		// known yet, but could be unaligned after the relocation is resolved.
   835  		// So gostrings are handled the same way.
   836  
   837  		// This allows the MOVDload and MOVWload to be generated in more cases and
   838  		// eliminates some offset and alignment checking in the rules file.
   839  
   840  		fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   841  		ssagen.AddAux(&fromAddr, v)
   842  
   843  		genAddr := false
   844  
   845  		switch fromAddr.Name {
   846  		case obj.NAME_EXTERN, obj.NAME_STATIC:
   847  			// Special case for a rule combines the bytes of gostring.
   848  			// The v alignment might seem OK, but we don't want to load it
   849  			// using an offset because relocation comes later.
   850  			genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
   851  		default:
   852  			genAddr = fromAddr.Offset%4 != 0
   853  		}
   854  		if genAddr {
   855  			// Load full address into the temp register.
   856  			p := s.Prog(ppc64.AMOVD)
   857  			p.From.Type = obj.TYPE_ADDR
   858  			p.From.Reg = v.Args[0].Reg()
   859  			ssagen.AddAux(&p.From, v)
   860  			// Load target using temp as base register
   861  			// and offset zero. Setting NAME_NONE
   862  			// prevents any extra offsets from being
   863  			// added.
   864  			p.To.Type = obj.TYPE_REG
   865  			p.To.Reg = ppc64.REGTMP
   866  			fromAddr.Reg = ppc64.REGTMP
   867  			// Clear the offset field and other
   868  			// information that might be used
   869  			// by the assembler to add to the
   870  			// final offset value.
   871  			fromAddr.Offset = 0
   872  			fromAddr.Name = obj.NAME_NONE
   873  			fromAddr.Sym = nil
   874  		}
   875  		p := s.Prog(v.Op.Asm())
   876  		p.From = fromAddr
   877  		p.To.Type = obj.TYPE_REG
   878  		p.To.Reg = v.Reg()
   879  
   880  	case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   881  		p := s.Prog(v.Op.Asm())
   882  		p.From.Type = obj.TYPE_MEM
   883  		p.From.Reg = v.Args[0].Reg()
   884  		ssagen.AddAux(&p.From, v)
   885  		p.To.Type = obj.TYPE_REG
   886  		p.To.Reg = v.Reg()
   887  
   888  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   889  		p := s.Prog(v.Op.Asm())
   890  		p.From.Type = obj.TYPE_MEM
   891  		p.From.Reg = v.Args[0].Reg()
   892  		p.To.Type = obj.TYPE_REG
   893  		p.To.Reg = v.Reg()
   894  
   895  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   896  		p := s.Prog(v.Op.Asm())
   897  		p.To.Type = obj.TYPE_MEM
   898  		p.To.Reg = v.Args[0].Reg()
   899  		p.From.Type = obj.TYPE_REG
   900  		p.From.Reg = v.Args[1].Reg()
   901  
   902  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   903  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   904  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   905  		p := s.Prog(v.Op.Asm())
   906  		p.From.Type = obj.TYPE_MEM
   907  		p.From.Reg = v.Args[0].Reg()
   908  		p.From.Index = v.Args[1].Reg()
   909  		p.To.Type = obj.TYPE_REG
   910  		p.To.Reg = v.Reg()
   911  
   912  	case ssa.OpPPC64DCBT:
   913  		p := s.Prog(v.Op.Asm())
   914  		p.From.Type = obj.TYPE_MEM
   915  		p.From.Reg = v.Args[0].Reg()
   916  		p.To.Type = obj.TYPE_CONST
   917  		p.To.Offset = v.AuxInt
   918  
   919  	case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   920  		p := s.Prog(v.Op.Asm())
   921  		p.From.Type = obj.TYPE_REG
   922  		p.From.Reg = ppc64.REGZERO
   923  		p.To.Type = obj.TYPE_MEM
   924  		p.To.Reg = v.Args[0].Reg()
   925  		ssagen.AddAux(&p.To, v)
   926  
   927  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
   928  
   929  		// MOVDstore and MOVDstorezero become DS form instructions that are restricted
   930  		// to offset values that are a multiple of 4. If the offset field is not a
   931  		// multiple of 4, then the full address of the store target is computed (base +
   932  		// offset) and used as the new base register and the offset in the instruction
   933  		// is set to 0.
   934  
   935  		// This allows the MOVDstore and MOVDstorezero to be generated in more cases,
   936  		// and prevents checking of the offset value and alignment in the rules.
   937  
   938  		toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   939  		ssagen.AddAux(&toAddr, v)
   940  
   941  		if toAddr.Offset%4 != 0 {
   942  			p := s.Prog(ppc64.AMOVD)
   943  			p.From.Type = obj.TYPE_ADDR
   944  			p.From.Reg = v.Args[0].Reg()
   945  			ssagen.AddAux(&p.From, v)
   946  			p.To.Type = obj.TYPE_REG
   947  			p.To.Reg = ppc64.REGTMP
   948  			toAddr.Reg = ppc64.REGTMP
   949  			// Clear the offset field and other
   950  			// information that might be used
   951  			// by the assembler to add to the
   952  			// final offset value.
   953  			toAddr.Offset = 0
   954  			toAddr.Name = obj.NAME_NONE
   955  			toAddr.Sym = nil
   956  		}
   957  		p := s.Prog(v.Op.Asm())
   958  		p.To = toAddr
   959  		p.From.Type = obj.TYPE_REG
   960  		if v.Op == ssa.OpPPC64MOVDstorezero {
   961  			p.From.Reg = ppc64.REGZERO
   962  		} else {
   963  			p.From.Reg = v.Args[1].Reg()
   964  		}
   965  
   966  	case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   967  		p := s.Prog(v.Op.Asm())
   968  		p.From.Type = obj.TYPE_REG
   969  		p.From.Reg = v.Args[1].Reg()
   970  		p.To.Type = obj.TYPE_MEM
   971  		p.To.Reg = v.Args[0].Reg()
   972  		ssagen.AddAux(&p.To, v)
   973  
   974  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   975  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   976  		ssa.OpPPC64MOVHBRstoreidx:
   977  		p := s.Prog(v.Op.Asm())
   978  		p.From.Type = obj.TYPE_REG
   979  		p.From.Reg = v.Args[2].Reg()
   980  		p.To.Index = v.Args[1].Reg()
   981  		p.To.Type = obj.TYPE_MEM
   982  		p.To.Reg = v.Args[0].Reg()
   983  
   984  	case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
   985  		// ISEL  AuxInt ? arg0 : arg1
   986  		// ISELZ is a special case of ISEL where arg1 is implicitly $0.
   987  		//
   988  		// AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
   989  		// ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
   990  		// Convert the condition to a CR bit argument by the following conversion:
   991  		//
   992  		// AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
   993  		// AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
   994  		p := s.Prog(v.Op.Asm())
   995  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   996  		p.Reg = v.Args[0].Reg()
   997  		if v.Op == ssa.OpPPC64ISEL {
   998  			p.AddRestSourceReg(v.Args[1].Reg())
   999  		} else {
  1000  			p.AddRestSourceReg(ppc64.REG_R0)
  1001  		}
  1002  		// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
  1003  		if v.AuxInt > 3 {
  1004  			p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
  1005  		}
  1006  		p.From.SetConst(v.AuxInt & 3)
  1007  
  1008  	case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
  1009  		p := s.Prog(v.Op.Asm())
  1010  		p.To.Type = obj.TYPE_REG
  1011  		p.To.Reg = v.Reg()
  1012  		p.From.Type = obj.TYPE_REG
  1013  		p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
  1014  
  1015  	case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
  1016  		// The LoweredQuad code generation
  1017  		// generates STXV instructions on
  1018  		// power9. The Short variation is used
  1019  		// if no loop is generated.
  1020  
  1021  		// sizes >= 64 generate a loop as follows:
  1022  
  1023  		// Set up loop counter in CTR, used by BC
  1024  		// XXLXOR clears VS32
  1025  		//       XXLXOR VS32,VS32,VS32
  1026  		//       MOVD len/64,REG_TMP
  1027  		//       MOVD REG_TMP,CTR
  1028  		//       loop:
  1029  		//       STXV VS32,0(R20)
  1030  		//       STXV VS32,16(R20)
  1031  		//       STXV VS32,32(R20)
  1032  		//       STXV VS32,48(R20)
  1033  		//       ADD  $64,R20
  1034  		//       BC   16, 0, loop
  1035  
  1036  		// Bytes per iteration
  1037  		ctr := v.AuxInt / 64
  1038  
  1039  		// Remainder bytes
  1040  		rem := v.AuxInt % 64
  1041  
  1042  		// Only generate a loop if there is more
  1043  		// than 1 iteration.
  1044  		if ctr > 1 {
  1045  			// Set up VS32 (V0) to hold 0s
  1046  			p := s.Prog(ppc64.AXXLXOR)
  1047  			p.From.Type = obj.TYPE_REG
  1048  			p.From.Reg = ppc64.REG_VS32
  1049  			p.To.Type = obj.TYPE_REG
  1050  			p.To.Reg = ppc64.REG_VS32
  1051  			p.Reg = ppc64.REG_VS32
  1052  
  1053  			// Set up CTR loop counter
  1054  			p = s.Prog(ppc64.AMOVD)
  1055  			p.From.Type = obj.TYPE_CONST
  1056  			p.From.Offset = ctr
  1057  			p.To.Type = obj.TYPE_REG
  1058  			p.To.Reg = ppc64.REGTMP
  1059  
  1060  			p = s.Prog(ppc64.AMOVD)
  1061  			p.From.Type = obj.TYPE_REG
  1062  			p.From.Reg = ppc64.REGTMP
  1063  			p.To.Type = obj.TYPE_REG
  1064  			p.To.Reg = ppc64.REG_CTR
  1065  
  1066  			// Don't generate padding for
  1067  			// loops with few iterations.
  1068  			if ctr > 3 {
  1069  				p = s.Prog(obj.APCALIGN)
  1070  				p.From.Type = obj.TYPE_CONST
  1071  				p.From.Offset = 16
  1072  			}
  1073  
  1074  			// generate 4 STXVs to zero 64 bytes
  1075  			var top *obj.Prog
  1076  
  1077  			p = s.Prog(ppc64.ASTXV)
  1078  			p.From.Type = obj.TYPE_REG
  1079  			p.From.Reg = ppc64.REG_VS32
  1080  			p.To.Type = obj.TYPE_MEM
  1081  			p.To.Reg = v.Args[0].Reg()
  1082  
  1083  			//  Save the top of loop
  1084  			if top == nil {
  1085  				top = p
  1086  			}
  1087  			p = s.Prog(ppc64.ASTXV)
  1088  			p.From.Type = obj.TYPE_REG
  1089  			p.From.Reg = ppc64.REG_VS32
  1090  			p.To.Type = obj.TYPE_MEM
  1091  			p.To.Reg = v.Args[0].Reg()
  1092  			p.To.Offset = 16
  1093  
  1094  			p = s.Prog(ppc64.ASTXV)
  1095  			p.From.Type = obj.TYPE_REG
  1096  			p.From.Reg = ppc64.REG_VS32
  1097  			p.To.Type = obj.TYPE_MEM
  1098  			p.To.Reg = v.Args[0].Reg()
  1099  			p.To.Offset = 32
  1100  
  1101  			p = s.Prog(ppc64.ASTXV)
  1102  			p.From.Type = obj.TYPE_REG
  1103  			p.From.Reg = ppc64.REG_VS32
  1104  			p.To.Type = obj.TYPE_MEM
  1105  			p.To.Reg = v.Args[0].Reg()
  1106  			p.To.Offset = 48
  1107  
  1108  			// Increment address for the
  1109  			// 64 bytes just zeroed.
  1110  			p = s.Prog(ppc64.AADD)
  1111  			p.Reg = v.Args[0].Reg()
  1112  			p.From.Type = obj.TYPE_CONST
  1113  			p.From.Offset = 64
  1114  			p.To.Type = obj.TYPE_REG
  1115  			p.To.Reg = v.Args[0].Reg()
  1116  
  1117  			// Branch back to top of loop
  1118  			// based on CTR
  1119  			// BC with BO_BCTR generates bdnz
  1120  			p = s.Prog(ppc64.ABC)
  1121  			p.From.Type = obj.TYPE_CONST
  1122  			p.From.Offset = ppc64.BO_BCTR
  1123  			p.Reg = ppc64.REG_CR0LT
  1124  			p.To.Type = obj.TYPE_BRANCH
  1125  			p.To.SetTarget(top)
  1126  		}
  1127  		// When ctr == 1 the loop was not generated but
  1128  		// there are at least 64 bytes to clear, so add
  1129  		// that to the remainder to generate the code
  1130  		// to clear those doublewords
  1131  		if ctr == 1 {
  1132  			rem += 64
  1133  		}
  1134  
  1135  		// Clear the remainder starting at offset zero
  1136  		offset := int64(0)
  1137  
  1138  		if rem >= 16 && ctr <= 1 {
  1139  			// If the XXLXOR hasn't already been
  1140  			// generated, do it here to initialize
  1141  			// VS32 (V0) to 0.
  1142  			p := s.Prog(ppc64.AXXLXOR)
  1143  			p.From.Type = obj.TYPE_REG
  1144  			p.From.Reg = ppc64.REG_VS32
  1145  			p.To.Type = obj.TYPE_REG
  1146  			p.To.Reg = ppc64.REG_VS32
  1147  			p.Reg = ppc64.REG_VS32
  1148  		}
  1149  		// Generate STXV for 32 or 64
  1150  		// bytes.
  1151  		for rem >= 32 {
  1152  			p := s.Prog(ppc64.ASTXV)
  1153  			p.From.Type = obj.TYPE_REG
  1154  			p.From.Reg = ppc64.REG_VS32
  1155  			p.To.Type = obj.TYPE_MEM
  1156  			p.To.Reg = v.Args[0].Reg()
  1157  			p.To.Offset = offset
  1158  
  1159  			p = s.Prog(ppc64.ASTXV)
  1160  			p.From.Type = obj.TYPE_REG
  1161  			p.From.Reg = ppc64.REG_VS32
  1162  			p.To.Type = obj.TYPE_MEM
  1163  			p.To.Reg = v.Args[0].Reg()
  1164  			p.To.Offset = offset + 16
  1165  			offset += 32
  1166  			rem -= 32
  1167  		}
  1168  		// Generate 16 bytes
  1169  		if rem >= 16 {
  1170  			p := s.Prog(ppc64.ASTXV)
  1171  			p.From.Type = obj.TYPE_REG
  1172  			p.From.Reg = ppc64.REG_VS32
  1173  			p.To.Type = obj.TYPE_MEM
  1174  			p.To.Reg = v.Args[0].Reg()
  1175  			p.To.Offset = offset
  1176  			offset += 16
  1177  			rem -= 16
  1178  		}
  1179  
  1180  		// first clear as many doublewords as possible
  1181  		// then clear remaining sizes as available
  1182  		for rem > 0 {
  1183  			op, size := ppc64.AMOVB, int64(1)
  1184  			switch {
  1185  			case rem >= 8:
  1186  				op, size = ppc64.AMOVD, 8
  1187  			case rem >= 4:
  1188  				op, size = ppc64.AMOVW, 4
  1189  			case rem >= 2:
  1190  				op, size = ppc64.AMOVH, 2
  1191  			}
  1192  			p := s.Prog(op)
  1193  			p.From.Type = obj.TYPE_REG
  1194  			p.From.Reg = ppc64.REG_R0
  1195  			p.To.Type = obj.TYPE_MEM
  1196  			p.To.Reg = v.Args[0].Reg()
  1197  			p.To.Offset = offset
  1198  			rem -= size
  1199  			offset += size
  1200  		}
  1201  
  1202  	case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
  1203  
  1204  		// Unaligned data doesn't hurt performance
  1205  		// for these instructions on power8.
  1206  
  1207  		// For sizes >= 64 generate a loop as follows:
  1208  
  1209  		// Set up loop counter in CTR, used by BC
  1210  		//       XXLXOR VS32,VS32,VS32
  1211  		//	 MOVD len/32,REG_TMP
  1212  		//	 MOVD REG_TMP,CTR
  1213  		//       MOVD $16,REG_TMP
  1214  		//	 loop:
  1215  		//	 STXVD2X VS32,(R0)(R20)
  1216  		//	 STXVD2X VS32,(R31)(R20)
  1217  		//	 ADD  $32,R20
  1218  		//	 BC   16, 0, loop
  1219  		//
  1220  		// any remainder is done as described below
  1221  
  1222  		// for sizes < 64 bytes, first clear as many doublewords as possible,
  1223  		// then handle the remainder
  1224  		//	MOVD R0,(R20)
  1225  		//	MOVD R0,8(R20)
  1226  		// .... etc.
  1227  		//
  1228  		// the remainder bytes are cleared using one or more
  1229  		// of the following instructions with the appropriate
  1230  		// offsets depending which instructions are needed
  1231  		//
  1232  		//	MOVW R0,n1(R20)	4 bytes
  1233  		//	MOVH R0,n2(R20)	2 bytes
  1234  		//	MOVB R0,n3(R20)	1 byte
  1235  		//
  1236  		// 7 bytes: MOVW, MOVH, MOVB
  1237  		// 6 bytes: MOVW, MOVH
  1238  		// 5 bytes: MOVW, MOVB
  1239  		// 3 bytes: MOVH, MOVB
  1240  
  1241  		// each loop iteration does 32 bytes
  1242  		ctr := v.AuxInt / 32
  1243  
  1244  		// remainder bytes
  1245  		rem := v.AuxInt % 32
  1246  
  1247  		// only generate a loop if there is more
  1248  		// than 1 iteration.
  1249  		if ctr > 1 {
  1250  			// Set up VS32 (V0) to hold 0s
  1251  			p := s.Prog(ppc64.AXXLXOR)
  1252  			p.From.Type = obj.TYPE_REG
  1253  			p.From.Reg = ppc64.REG_VS32
  1254  			p.To.Type = obj.TYPE_REG
  1255  			p.To.Reg = ppc64.REG_VS32
  1256  			p.Reg = ppc64.REG_VS32
  1257  
  1258  			// Set up CTR loop counter
  1259  			p = s.Prog(ppc64.AMOVD)
  1260  			p.From.Type = obj.TYPE_CONST
  1261  			p.From.Offset = ctr
  1262  			p.To.Type = obj.TYPE_REG
  1263  			p.To.Reg = ppc64.REGTMP
  1264  
  1265  			p = s.Prog(ppc64.AMOVD)
  1266  			p.From.Type = obj.TYPE_REG
  1267  			p.From.Reg = ppc64.REGTMP
  1268  			p.To.Type = obj.TYPE_REG
  1269  			p.To.Reg = ppc64.REG_CTR
  1270  
  1271  			// Set up R31 to hold index value 16
  1272  			p = s.Prog(ppc64.AMOVD)
  1273  			p.From.Type = obj.TYPE_CONST
  1274  			p.From.Offset = 16
  1275  			p.To.Type = obj.TYPE_REG
  1276  			p.To.Reg = ppc64.REGTMP
  1277  
  1278  			// Don't add padding for alignment
  1279  			// with few loop iterations.
  1280  			if ctr > 3 {
  1281  				p = s.Prog(obj.APCALIGN)
  1282  				p.From.Type = obj.TYPE_CONST
  1283  				p.From.Offset = 16
  1284  			}
  1285  
  1286  			// generate 2 STXVD2Xs to store 16 bytes
  1287  			// when this is a loop then the top must be saved
  1288  			var top *obj.Prog
  1289  			// This is the top of loop
  1290  
  1291  			p = s.Prog(ppc64.ASTXVD2X)
  1292  			p.From.Type = obj.TYPE_REG
  1293  			p.From.Reg = ppc64.REG_VS32
  1294  			p.To.Type = obj.TYPE_MEM
  1295  			p.To.Reg = v.Args[0].Reg()
  1296  			p.To.Index = ppc64.REGZERO
  1297  			// Save the top of loop
  1298  			if top == nil {
  1299  				top = p
  1300  			}
  1301  			p = s.Prog(ppc64.ASTXVD2X)
  1302  			p.From.Type = obj.TYPE_REG
  1303  			p.From.Reg = ppc64.REG_VS32
  1304  			p.To.Type = obj.TYPE_MEM
  1305  			p.To.Reg = v.Args[0].Reg()
  1306  			p.To.Index = ppc64.REGTMP
  1307  
  1308  			// Increment address for the
  1309  			// 4 doublewords just zeroed.
  1310  			p = s.Prog(ppc64.AADD)
  1311  			p.Reg = v.Args[0].Reg()
  1312  			p.From.Type = obj.TYPE_CONST
  1313  			p.From.Offset = 32
  1314  			p.To.Type = obj.TYPE_REG
  1315  			p.To.Reg = v.Args[0].Reg()
  1316  
  1317  			// Branch back to top of loop
  1318  			// based on CTR
  1319  			// BC with BO_BCTR generates bdnz
  1320  			p = s.Prog(ppc64.ABC)
  1321  			p.From.Type = obj.TYPE_CONST
  1322  			p.From.Offset = ppc64.BO_BCTR
  1323  			p.Reg = ppc64.REG_CR0LT
  1324  			p.To.Type = obj.TYPE_BRANCH
  1325  			p.To.SetTarget(top)
  1326  		}
  1327  
  1328  		// when ctr == 1 the loop was not generated but
  1329  		// there are at least 32 bytes to clear, so add
  1330  		// that to the remainder to generate the code
  1331  		// to clear those doublewords
  1332  		if ctr == 1 {
  1333  			rem += 32
  1334  		}
  1335  
  1336  		// clear the remainder starting at offset zero
  1337  		offset := int64(0)
  1338  
  1339  		// first clear as many doublewords as possible
  1340  		// then clear remaining sizes as available
  1341  		for rem > 0 {
  1342  			op, size := ppc64.AMOVB, int64(1)
  1343  			switch {
  1344  			case rem >= 8:
  1345  				op, size = ppc64.AMOVD, 8
  1346  			case rem >= 4:
  1347  				op, size = ppc64.AMOVW, 4
  1348  			case rem >= 2:
  1349  				op, size = ppc64.AMOVH, 2
  1350  			}
  1351  			p := s.Prog(op)
  1352  			p.From.Type = obj.TYPE_REG
  1353  			p.From.Reg = ppc64.REG_R0
  1354  			p.To.Type = obj.TYPE_MEM
  1355  			p.To.Reg = v.Args[0].Reg()
  1356  			p.To.Offset = offset
  1357  			rem -= size
  1358  			offset += size
  1359  		}
  1360  
  1361  	case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
  1362  
  1363  		bytesPerLoop := int64(32)
  1364  		// This will be used when moving more
  1365  		// than 8 bytes.  Moves start with
  1366  		// as many 8 byte moves as possible, then
  1367  		// 4, 2, or 1 byte(s) as remaining.  This will
  1368  		// work and be efficient for power8 or later.
  1369  		// If there are 64 or more bytes, then a
  1370  		// loop is generated to move 32 bytes and
  1371  		// update the src and dst addresses on each
  1372  		// iteration. When < 64 bytes, the appropriate
  1373  		// number of moves are generated based on the
  1374  		// size.
  1375  		// When moving >= 64 bytes a loop is used
  1376  		//	MOVD len/32,REG_TMP
  1377  		//	MOVD REG_TMP,CTR
  1378  		//	MOVD $16,REG_TMP
  1379  		// top:
  1380  		//	LXVD2X (R0)(R21),VS32
  1381  		//	LXVD2X (R31)(R21),VS33
  1382  		//	ADD $32,R21
  1383  		//	STXVD2X VS32,(R0)(R20)
  1384  		//	STXVD2X VS33,(R31)(R20)
  1385  		//	ADD $32,R20
  1386  		//	BC 16,0,top
  1387  		// Bytes not moved by this loop are moved
  1388  		// with a combination of the following instructions,
  1389  		// starting with the largest sizes and generating as
  1390  		// many as needed, using the appropriate offset value.
  1391  		//	MOVD  n(R21),R31
  1392  		//	MOVD  R31,n(R20)
  1393  		//	MOVW  n1(R21),R31
  1394  		//	MOVW  R31,n1(R20)
  1395  		//	MOVH  n2(R21),R31
  1396  		//	MOVH  R31,n2(R20)
  1397  		//	MOVB  n3(R21),R31
  1398  		//	MOVB  R31,n3(R20)
  1399  
  1400  		// Each loop iteration moves 32 bytes
  1401  		ctr := v.AuxInt / bytesPerLoop
  1402  
  1403  		// Remainder after the loop
  1404  		rem := v.AuxInt % bytesPerLoop
  1405  
  1406  		dstReg := v.Args[0].Reg()
  1407  		srcReg := v.Args[1].Reg()
  1408  
  1409  		// The set of registers used here, must match the clobbered reg list
  1410  		// in PPC64Ops.go.
  1411  		offset := int64(0)
  1412  
  1413  		// top of the loop
  1414  		var top *obj.Prog
  1415  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1416  		if ctr > 1 {
  1417  			// Set up the CTR
  1418  			p := s.Prog(ppc64.AMOVD)
  1419  			p.From.Type = obj.TYPE_CONST
  1420  			p.From.Offset = ctr
  1421  			p.To.Type = obj.TYPE_REG
  1422  			p.To.Reg = ppc64.REGTMP
  1423  
  1424  			p = s.Prog(ppc64.AMOVD)
  1425  			p.From.Type = obj.TYPE_REG
  1426  			p.From.Reg = ppc64.REGTMP
  1427  			p.To.Type = obj.TYPE_REG
  1428  			p.To.Reg = ppc64.REG_CTR
  1429  
  1430  			// Use REGTMP as index reg
  1431  			p = s.Prog(ppc64.AMOVD)
  1432  			p.From.Type = obj.TYPE_CONST
  1433  			p.From.Offset = 16
  1434  			p.To.Type = obj.TYPE_REG
  1435  			p.To.Reg = ppc64.REGTMP
  1436  
  1437  			// Don't adding padding for
  1438  			// alignment with small iteration
  1439  			// counts.
  1440  			if ctr > 3 {
  1441  				p = s.Prog(obj.APCALIGN)
  1442  				p.From.Type = obj.TYPE_CONST
  1443  				p.From.Offset = 16
  1444  			}
  1445  
  1446  			// Generate 16 byte loads and stores.
  1447  			// Use temp register for index (16)
  1448  			// on the second one.
  1449  
  1450  			p = s.Prog(ppc64.ALXVD2X)
  1451  			p.From.Type = obj.TYPE_MEM
  1452  			p.From.Reg = srcReg
  1453  			p.From.Index = ppc64.REGZERO
  1454  			p.To.Type = obj.TYPE_REG
  1455  			p.To.Reg = ppc64.REG_VS32
  1456  			if top == nil {
  1457  				top = p
  1458  			}
  1459  			p = s.Prog(ppc64.ALXVD2X)
  1460  			p.From.Type = obj.TYPE_MEM
  1461  			p.From.Reg = srcReg
  1462  			p.From.Index = ppc64.REGTMP
  1463  			p.To.Type = obj.TYPE_REG
  1464  			p.To.Reg = ppc64.REG_VS33
  1465  
  1466  			// increment the src reg for next iteration
  1467  			p = s.Prog(ppc64.AADD)
  1468  			p.Reg = srcReg
  1469  			p.From.Type = obj.TYPE_CONST
  1470  			p.From.Offset = bytesPerLoop
  1471  			p.To.Type = obj.TYPE_REG
  1472  			p.To.Reg = srcReg
  1473  
  1474  			// generate 16 byte stores
  1475  			p = s.Prog(ppc64.ASTXVD2X)
  1476  			p.From.Type = obj.TYPE_REG
  1477  			p.From.Reg = ppc64.REG_VS32
  1478  			p.To.Type = obj.TYPE_MEM
  1479  			p.To.Reg = dstReg
  1480  			p.To.Index = ppc64.REGZERO
  1481  
  1482  			p = s.Prog(ppc64.ASTXVD2X)
  1483  			p.From.Type = obj.TYPE_REG
  1484  			p.From.Reg = ppc64.REG_VS33
  1485  			p.To.Type = obj.TYPE_MEM
  1486  			p.To.Reg = dstReg
  1487  			p.To.Index = ppc64.REGTMP
  1488  
  1489  			// increment the dst reg for next iteration
  1490  			p = s.Prog(ppc64.AADD)
  1491  			p.Reg = dstReg
  1492  			p.From.Type = obj.TYPE_CONST
  1493  			p.From.Offset = bytesPerLoop
  1494  			p.To.Type = obj.TYPE_REG
  1495  			p.To.Reg = dstReg
  1496  
  1497  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1498  			// to loop top.
  1499  			p = s.Prog(ppc64.ABC)
  1500  			p.From.Type = obj.TYPE_CONST
  1501  			p.From.Offset = ppc64.BO_BCTR
  1502  			p.Reg = ppc64.REG_CR0LT
  1503  			p.To.Type = obj.TYPE_BRANCH
  1504  			p.To.SetTarget(top)
  1505  
  1506  			// srcReg and dstReg were incremented in the loop, so
  1507  			// later instructions start with offset 0.
  1508  			offset = int64(0)
  1509  		}
  1510  
  1511  		// No loop was generated for one iteration, so
  1512  		// add 32 bytes to the remainder to move those bytes.
  1513  		if ctr == 1 {
  1514  			rem += bytesPerLoop
  1515  		}
  1516  
  1517  		if rem >= 16 {
  1518  			// Generate 16 byte loads and stores.
  1519  			// Use temp register for index (value 16)
  1520  			// on the second one.
  1521  			p := s.Prog(ppc64.ALXVD2X)
  1522  			p.From.Type = obj.TYPE_MEM
  1523  			p.From.Reg = srcReg
  1524  			p.From.Index = ppc64.REGZERO
  1525  			p.To.Type = obj.TYPE_REG
  1526  			p.To.Reg = ppc64.REG_VS32
  1527  
  1528  			p = s.Prog(ppc64.ASTXVD2X)
  1529  			p.From.Type = obj.TYPE_REG
  1530  			p.From.Reg = ppc64.REG_VS32
  1531  			p.To.Type = obj.TYPE_MEM
  1532  			p.To.Reg = dstReg
  1533  			p.To.Index = ppc64.REGZERO
  1534  
  1535  			offset = 16
  1536  			rem -= 16
  1537  
  1538  			if rem >= 16 {
  1539  				// Use REGTMP as index reg
  1540  				p := s.Prog(ppc64.AMOVD)
  1541  				p.From.Type = obj.TYPE_CONST
  1542  				p.From.Offset = 16
  1543  				p.To.Type = obj.TYPE_REG
  1544  				p.To.Reg = ppc64.REGTMP
  1545  
  1546  				p = s.Prog(ppc64.ALXVD2X)
  1547  				p.From.Type = obj.TYPE_MEM
  1548  				p.From.Reg = srcReg
  1549  				p.From.Index = ppc64.REGTMP
  1550  				p.To.Type = obj.TYPE_REG
  1551  				p.To.Reg = ppc64.REG_VS32
  1552  
  1553  				p = s.Prog(ppc64.ASTXVD2X)
  1554  				p.From.Type = obj.TYPE_REG
  1555  				p.From.Reg = ppc64.REG_VS32
  1556  				p.To.Type = obj.TYPE_MEM
  1557  				p.To.Reg = dstReg
  1558  				p.To.Index = ppc64.REGTMP
  1559  
  1560  				offset = 32
  1561  				rem -= 16
  1562  			}
  1563  		}
  1564  
  1565  		// Generate all the remaining load and store pairs, starting with
  1566  		// as many 8 byte moves as possible, then 4, 2, 1.
  1567  		for rem > 0 {
  1568  			op, size := ppc64.AMOVB, int64(1)
  1569  			switch {
  1570  			case rem >= 8:
  1571  				op, size = ppc64.AMOVD, 8
  1572  			case rem >= 4:
  1573  				op, size = ppc64.AMOVWZ, 4
  1574  			case rem >= 2:
  1575  				op, size = ppc64.AMOVH, 2
  1576  			}
  1577  			// Load
  1578  			p := s.Prog(op)
  1579  			p.To.Type = obj.TYPE_REG
  1580  			p.To.Reg = ppc64.REGTMP
  1581  			p.From.Type = obj.TYPE_MEM
  1582  			p.From.Reg = srcReg
  1583  			p.From.Offset = offset
  1584  
  1585  			// Store
  1586  			p = s.Prog(op)
  1587  			p.From.Type = obj.TYPE_REG
  1588  			p.From.Reg = ppc64.REGTMP
  1589  			p.To.Type = obj.TYPE_MEM
  1590  			p.To.Reg = dstReg
  1591  			p.To.Offset = offset
  1592  			rem -= size
  1593  			offset += size
  1594  		}
  1595  
  1596  	case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
  1597  		bytesPerLoop := int64(64)
  1598  		// This is used when moving more
  1599  		// than 8 bytes on power9.  Moves start with
  1600  		// as many 8 byte moves as possible, then
  1601  		// 4, 2, or 1 byte(s) as remaining.  This will
  1602  		// work and be efficient for power8 or later.
  1603  		// If there are 64 or more bytes, then a
  1604  		// loop is generated to move 32 bytes and
  1605  		// update the src and dst addresses on each
  1606  		// iteration. When < 64 bytes, the appropriate
  1607  		// number of moves are generated based on the
  1608  		// size.
  1609  		// When moving >= 64 bytes a loop is used
  1610  		//      MOVD len/32,REG_TMP
  1611  		//      MOVD REG_TMP,CTR
  1612  		// top:
  1613  		//      LXV 0(R21),VS32
  1614  		//      LXV 16(R21),VS33
  1615  		//      ADD $32,R21
  1616  		//      STXV VS32,0(R20)
  1617  		//      STXV VS33,16(R20)
  1618  		//      ADD $32,R20
  1619  		//      BC 16,0,top
  1620  		// Bytes not moved by this loop are moved
  1621  		// with a combination of the following instructions,
  1622  		// starting with the largest sizes and generating as
  1623  		// many as needed, using the appropriate offset value.
  1624  		//      MOVD  n(R21),R31
  1625  		//      MOVD  R31,n(R20)
  1626  		//      MOVW  n1(R21),R31
  1627  		//      MOVW  R31,n1(R20)
  1628  		//      MOVH  n2(R21),R31
  1629  		//      MOVH  R31,n2(R20)
  1630  		//      MOVB  n3(R21),R31
  1631  		//      MOVB  R31,n3(R20)
  1632  
  1633  		// Each loop iteration moves 32 bytes
  1634  		ctr := v.AuxInt / bytesPerLoop
  1635  
  1636  		// Remainder after the loop
  1637  		rem := v.AuxInt % bytesPerLoop
  1638  
  1639  		dstReg := v.Args[0].Reg()
  1640  		srcReg := v.Args[1].Reg()
  1641  
  1642  		offset := int64(0)
  1643  
  1644  		// top of the loop
  1645  		var top *obj.Prog
  1646  
  1647  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1648  		if ctr > 1 {
  1649  			// Set up the CTR
  1650  			p := s.Prog(ppc64.AMOVD)
  1651  			p.From.Type = obj.TYPE_CONST
  1652  			p.From.Offset = ctr
  1653  			p.To.Type = obj.TYPE_REG
  1654  			p.To.Reg = ppc64.REGTMP
  1655  
  1656  			p = s.Prog(ppc64.AMOVD)
  1657  			p.From.Type = obj.TYPE_REG
  1658  			p.From.Reg = ppc64.REGTMP
  1659  			p.To.Type = obj.TYPE_REG
  1660  			p.To.Reg = ppc64.REG_CTR
  1661  
  1662  			p = s.Prog(obj.APCALIGN)
  1663  			p.From.Type = obj.TYPE_CONST
  1664  			p.From.Offset = 16
  1665  
  1666  			// Generate 16 byte loads and stores.
  1667  			p = s.Prog(ppc64.ALXV)
  1668  			p.From.Type = obj.TYPE_MEM
  1669  			p.From.Reg = srcReg
  1670  			p.From.Offset = offset
  1671  			p.To.Type = obj.TYPE_REG
  1672  			p.To.Reg = ppc64.REG_VS32
  1673  			if top == nil {
  1674  				top = p
  1675  			}
  1676  			p = s.Prog(ppc64.ALXV)
  1677  			p.From.Type = obj.TYPE_MEM
  1678  			p.From.Reg = srcReg
  1679  			p.From.Offset = offset + 16
  1680  			p.To.Type = obj.TYPE_REG
  1681  			p.To.Reg = ppc64.REG_VS33
  1682  
  1683  			// generate 16 byte stores
  1684  			p = s.Prog(ppc64.ASTXV)
  1685  			p.From.Type = obj.TYPE_REG
  1686  			p.From.Reg = ppc64.REG_VS32
  1687  			p.To.Type = obj.TYPE_MEM
  1688  			p.To.Reg = dstReg
  1689  			p.To.Offset = offset
  1690  
  1691  			p = s.Prog(ppc64.ASTXV)
  1692  			p.From.Type = obj.TYPE_REG
  1693  			p.From.Reg = ppc64.REG_VS33
  1694  			p.To.Type = obj.TYPE_MEM
  1695  			p.To.Reg = dstReg
  1696  			p.To.Offset = offset + 16
  1697  
  1698  			// Generate 16 byte loads and stores.
  1699  			p = s.Prog(ppc64.ALXV)
  1700  			p.From.Type = obj.TYPE_MEM
  1701  			p.From.Reg = srcReg
  1702  			p.From.Offset = offset + 32
  1703  			p.To.Type = obj.TYPE_REG
  1704  			p.To.Reg = ppc64.REG_VS32
  1705  
  1706  			p = s.Prog(ppc64.ALXV)
  1707  			p.From.Type = obj.TYPE_MEM
  1708  			p.From.Reg = srcReg
  1709  			p.From.Offset = offset + 48
  1710  			p.To.Type = obj.TYPE_REG
  1711  			p.To.Reg = ppc64.REG_VS33
  1712  
  1713  			// generate 16 byte stores
  1714  			p = s.Prog(ppc64.ASTXV)
  1715  			p.From.Type = obj.TYPE_REG
  1716  			p.From.Reg = ppc64.REG_VS32
  1717  			p.To.Type = obj.TYPE_MEM
  1718  			p.To.Reg = dstReg
  1719  			p.To.Offset = offset + 32
  1720  
  1721  			p = s.Prog(ppc64.ASTXV)
  1722  			p.From.Type = obj.TYPE_REG
  1723  			p.From.Reg = ppc64.REG_VS33
  1724  			p.To.Type = obj.TYPE_MEM
  1725  			p.To.Reg = dstReg
  1726  			p.To.Offset = offset + 48
  1727  
  1728  			// increment the src reg for next iteration
  1729  			p = s.Prog(ppc64.AADD)
  1730  			p.Reg = srcReg
  1731  			p.From.Type = obj.TYPE_CONST
  1732  			p.From.Offset = bytesPerLoop
  1733  			p.To.Type = obj.TYPE_REG
  1734  			p.To.Reg = srcReg
  1735  
  1736  			// increment the dst reg for next iteration
  1737  			p = s.Prog(ppc64.AADD)
  1738  			p.Reg = dstReg
  1739  			p.From.Type = obj.TYPE_CONST
  1740  			p.From.Offset = bytesPerLoop
  1741  			p.To.Type = obj.TYPE_REG
  1742  			p.To.Reg = dstReg
  1743  
  1744  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1745  			// to loop top.
  1746  			p = s.Prog(ppc64.ABC)
  1747  			p.From.Type = obj.TYPE_CONST
  1748  			p.From.Offset = ppc64.BO_BCTR
  1749  			p.Reg = ppc64.REG_CR0LT
  1750  			p.To.Type = obj.TYPE_BRANCH
  1751  			p.To.SetTarget(top)
  1752  
  1753  			// srcReg and dstReg were incremented in the loop, so
  1754  			// later instructions start with offset 0.
  1755  			offset = int64(0)
  1756  		}
  1757  
  1758  		// No loop was generated for one iteration, so
  1759  		// add 32 bytes to the remainder to move those bytes.
  1760  		if ctr == 1 {
  1761  			rem += bytesPerLoop
  1762  		}
  1763  		if rem >= 32 {
  1764  			p := s.Prog(ppc64.ALXV)
  1765  			p.From.Type = obj.TYPE_MEM
  1766  			p.From.Reg = srcReg
  1767  			p.To.Type = obj.TYPE_REG
  1768  			p.To.Reg = ppc64.REG_VS32
  1769  
  1770  			p = s.Prog(ppc64.ALXV)
  1771  			p.From.Type = obj.TYPE_MEM
  1772  			p.From.Reg = srcReg
  1773  			p.From.Offset = 16
  1774  			p.To.Type = obj.TYPE_REG
  1775  			p.To.Reg = ppc64.REG_VS33
  1776  
  1777  			p = s.Prog(ppc64.ASTXV)
  1778  			p.From.Type = obj.TYPE_REG
  1779  			p.From.Reg = ppc64.REG_VS32
  1780  			p.To.Type = obj.TYPE_MEM
  1781  			p.To.Reg = dstReg
  1782  
  1783  			p = s.Prog(ppc64.ASTXV)
  1784  			p.From.Type = obj.TYPE_REG
  1785  			p.From.Reg = ppc64.REG_VS33
  1786  			p.To.Type = obj.TYPE_MEM
  1787  			p.To.Reg = dstReg
  1788  			p.To.Offset = 16
  1789  
  1790  			offset = 32
  1791  			rem -= 32
  1792  		}
  1793  
  1794  		if rem >= 16 {
  1795  			// Generate 16 byte loads and stores.
  1796  			p := s.Prog(ppc64.ALXV)
  1797  			p.From.Type = obj.TYPE_MEM
  1798  			p.From.Reg = srcReg
  1799  			p.From.Offset = offset
  1800  			p.To.Type = obj.TYPE_REG
  1801  			p.To.Reg = ppc64.REG_VS32
  1802  
  1803  			p = s.Prog(ppc64.ASTXV)
  1804  			p.From.Type = obj.TYPE_REG
  1805  			p.From.Reg = ppc64.REG_VS32
  1806  			p.To.Type = obj.TYPE_MEM
  1807  			p.To.Reg = dstReg
  1808  			p.To.Offset = offset
  1809  
  1810  			offset += 16
  1811  			rem -= 16
  1812  
  1813  			if rem >= 16 {
  1814  				p := s.Prog(ppc64.ALXV)
  1815  				p.From.Type = obj.TYPE_MEM
  1816  				p.From.Reg = srcReg
  1817  				p.From.Offset = offset
  1818  				p.To.Type = obj.TYPE_REG
  1819  				p.To.Reg = ppc64.REG_VS32
  1820  
  1821  				p = s.Prog(ppc64.ASTXV)
  1822  				p.From.Type = obj.TYPE_REG
  1823  				p.From.Reg = ppc64.REG_VS32
  1824  				p.To.Type = obj.TYPE_MEM
  1825  				p.To.Reg = dstReg
  1826  				p.To.Offset = offset
  1827  
  1828  				offset += 16
  1829  				rem -= 16
  1830  			}
  1831  		}
  1832  		// Generate all the remaining load and store pairs, starting with
  1833  		// as many 8 byte moves as possible, then 4, 2, 1.
  1834  		for rem > 0 {
  1835  			op, size := ppc64.AMOVB, int64(1)
  1836  			switch {
  1837  			case rem >= 8:
  1838  				op, size = ppc64.AMOVD, 8
  1839  			case rem >= 4:
  1840  				op, size = ppc64.AMOVWZ, 4
  1841  			case rem >= 2:
  1842  				op, size = ppc64.AMOVH, 2
  1843  			}
  1844  			// Load
  1845  			p := s.Prog(op)
  1846  			p.To.Type = obj.TYPE_REG
  1847  			p.To.Reg = ppc64.REGTMP
  1848  			p.From.Type = obj.TYPE_MEM
  1849  			p.From.Reg = srcReg
  1850  			p.From.Offset = offset
  1851  
  1852  			// Store
  1853  			p = s.Prog(op)
  1854  			p.From.Type = obj.TYPE_REG
  1855  			p.From.Reg = ppc64.REGTMP
  1856  			p.To.Type = obj.TYPE_MEM
  1857  			p.To.Reg = dstReg
  1858  			p.To.Offset = offset
  1859  			rem -= size
  1860  			offset += size
  1861  		}
  1862  
  1863  	case ssa.OpPPC64CALLstatic:
  1864  		s.Call(v)
  1865  
  1866  	case ssa.OpPPC64CALLtail:
  1867  		s.TailCall(v)
  1868  
  1869  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1870  		p := s.Prog(ppc64.AMOVD)
  1871  		p.From.Type = obj.TYPE_REG
  1872  		p.From.Reg = v.Args[0].Reg()
  1873  		p.To.Type = obj.TYPE_REG
  1874  		p.To.Reg = ppc64.REG_LR
  1875  
  1876  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1877  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1878  		}
  1879  
  1880  		pp := s.Call(v)
  1881  
  1882  		// Convert the call into a blrl with hint this is not a subroutine return.
  1883  		// The full bclrl opcode must be specified when passing a hint.
  1884  		pp.As = ppc64.ABCL
  1885  		pp.From.Type = obj.TYPE_CONST
  1886  		pp.From.Offset = ppc64.BO_ALWAYS
  1887  		pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
  1888  		pp.To.Reg = ppc64.REG_LR
  1889  		pp.AddRestSourceConst(1)
  1890  
  1891  		if ppc64.NeedTOCpointer(base.Ctxt) {
  1892  			// When compiling Go into PIC, the function we just
  1893  			// called via pointer might have been implemented in
  1894  			// a separate module and so overwritten the TOC
  1895  			// pointer in R2; reload it.
  1896  			q := s.Prog(ppc64.AMOVD)
  1897  			q.From.Type = obj.TYPE_MEM
  1898  			q.From.Offset = 24
  1899  			q.From.Reg = ppc64.REGSP
  1900  			q.To.Type = obj.TYPE_REG
  1901  			q.To.Reg = ppc64.REG_R2
  1902  		}
  1903  
  1904  	case ssa.OpPPC64LoweredWB:
  1905  		p := s.Prog(obj.ACALL)
  1906  		p.To.Type = obj.TYPE_MEM
  1907  		p.To.Name = obj.NAME_EXTERN
  1908  		// AuxInt encodes how many buffer entries we need.
  1909  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1910  
  1911  	case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
  1912  		p := s.Prog(obj.ACALL)
  1913  		p.To.Type = obj.TYPE_MEM
  1914  		p.To.Name = obj.NAME_EXTERN
  1915  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1916  		s.UseArgs(16) // space used in callee args area by assembly stubs
  1917  
  1918  	case ssa.OpPPC64LoweredNilCheck:
  1919  		if buildcfg.GOOS == "aix" {
  1920  			// CMP Rarg0, $0
  1921  			// BNE 2(PC)
  1922  			// STW R0, 0(R0)
  1923  			// NOP (so the BNE has somewhere to land)
  1924  
  1925  			// CMP Rarg0, $0
  1926  			p := s.Prog(ppc64.ACMP)
  1927  			p.From.Type = obj.TYPE_REG
  1928  			p.From.Reg = v.Args[0].Reg()
  1929  			p.To.Type = obj.TYPE_CONST
  1930  			p.To.Offset = 0
  1931  
  1932  			// BNE 2(PC)
  1933  			p2 := s.Prog(ppc64.ABNE)
  1934  			p2.To.Type = obj.TYPE_BRANCH
  1935  
  1936  			// STW R0, 0(R0)
  1937  			// Write at 0 is forbidden and will trigger a SIGSEGV
  1938  			p = s.Prog(ppc64.AMOVW)
  1939  			p.From.Type = obj.TYPE_REG
  1940  			p.From.Reg = ppc64.REG_R0
  1941  			p.To.Type = obj.TYPE_MEM
  1942  			p.To.Reg = ppc64.REG_R0
  1943  
  1944  			// NOP (so the BNE has somewhere to land)
  1945  			nop := s.Prog(obj.ANOP)
  1946  			p2.To.SetTarget(nop)
  1947  
  1948  		} else {
  1949  			// Issue a load which will fault if arg is nil.
  1950  			p := s.Prog(ppc64.AMOVBZ)
  1951  			p.From.Type = obj.TYPE_MEM
  1952  			p.From.Reg = v.Args[0].Reg()
  1953  			ssagen.AddAux(&p.From, v)
  1954  			p.To.Type = obj.TYPE_REG
  1955  			p.To.Reg = ppc64.REGTMP
  1956  		}
  1957  		if logopt.Enabled() {
  1958  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1959  		}
  1960  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1961  			base.WarnfAt(v.Pos, "generated nil check")
  1962  		}
  1963  
  1964  	// These should be resolved by rules and not make it here.
  1965  	case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
  1966  		ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
  1967  		ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
  1968  		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
  1969  	case ssa.OpPPC64InvertFlags:
  1970  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1971  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1972  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1973  	case ssa.OpClobber, ssa.OpClobberReg:
  1974  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1975  	default:
  1976  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1977  	}
  1978  }
  1979  
  1980  var blockJump = [...]struct {
  1981  	asm, invasm     obj.As
  1982  	asmeq, invasmun bool
  1983  }{
  1984  	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  1985  	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  1986  
  1987  	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1988  	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  1989  	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  1990  	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1991  
  1992  	// TODO: need to work FP comparisons into block jumps
  1993  	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1994  	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  1995  	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  1996  	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1997  }
  1998  
  1999  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  2000  	switch b.Kind {
  2001  	case ssa.BlockDefer:
  2002  		// defer returns in R3:
  2003  		// 0 if we should continue executing
  2004  		// 1 if we should jump to deferreturn call
  2005  		p := s.Prog(ppc64.ACMP)
  2006  		p.From.Type = obj.TYPE_REG
  2007  		p.From.Reg = ppc64.REG_R3
  2008  		p.To.Type = obj.TYPE_CONST
  2009  		p.To.Offset = 0
  2010  
  2011  		p = s.Prog(ppc64.ABNE)
  2012  		p.To.Type = obj.TYPE_BRANCH
  2013  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  2014  		if b.Succs[0].Block() != next {
  2015  			p := s.Prog(obj.AJMP)
  2016  			p.To.Type = obj.TYPE_BRANCH
  2017  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2018  		}
  2019  
  2020  	case ssa.BlockPlain:
  2021  		if b.Succs[0].Block() != next {
  2022  			p := s.Prog(obj.AJMP)
  2023  			p.To.Type = obj.TYPE_BRANCH
  2024  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2025  		}
  2026  	case ssa.BlockExit, ssa.BlockRetJmp:
  2027  	case ssa.BlockRet:
  2028  		s.Prog(obj.ARET)
  2029  
  2030  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  2031  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  2032  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  2033  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  2034  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  2035  		jmp := blockJump[b.Kind]
  2036  		switch next {
  2037  		case b.Succs[0].Block():
  2038  			s.Br(jmp.invasm, b.Succs[1].Block())
  2039  			if jmp.invasmun {
  2040  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2041  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  2042  			}
  2043  		case b.Succs[1].Block():
  2044  			s.Br(jmp.asm, b.Succs[0].Block())
  2045  			if jmp.asmeq {
  2046  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2047  			}
  2048  		default:
  2049  			if b.Likely != ssa.BranchUnlikely {
  2050  				s.Br(jmp.asm, b.Succs[0].Block())
  2051  				if jmp.asmeq {
  2052  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2053  				}
  2054  				s.Br(obj.AJMP, b.Succs[1].Block())
  2055  			} else {
  2056  				s.Br(jmp.invasm, b.Succs[1].Block())
  2057  				if jmp.invasmun {
  2058  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2059  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  2060  				}
  2061  				s.Br(obj.AJMP, b.Succs[0].Block())
  2062  			}
  2063  		}
  2064  	default:
  2065  		b.Fatalf("branch not implemented: %s", b.LongString())
  2066  	}
  2067  }
  2068  
  2069  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2070  	p := s.Prog(loadByType(t))
  2071  	p.From.Type = obj.TYPE_MEM
  2072  	p.From.Name = obj.NAME_AUTO
  2073  	p.From.Sym = n.Linksym()
  2074  	p.From.Offset = n.FrameOffset() + off
  2075  	p.To.Type = obj.TYPE_REG
  2076  	p.To.Reg = reg
  2077  	return p
  2078  }
  2079  
  2080  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2081  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  2082  	p.To.Name = obj.NAME_PARAM
  2083  	p.To.Sym = n.Linksym()
  2084  	p.Pos = p.Pos.WithNotStmt()
  2085  	return p
  2086  }
  2087  

View as plain text