PPC64Ops.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Less-than-64-bit integer types live in the low portion of registers.
    11  //    The upper portion is junk.
    12  //  - Boolean types are zero or 1; stored in a byte, with upper bytes of the register containing junk.
    13  //  - *const instructions may use a constant larger than the instruction can encode.
    14  //    In this case the assembler expands to multiple instructions and uses tmp
    15  //    register (R31).
    16  
    17  var regNamesPPC64 = []string{
    18  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    19  	"SP", // REGSP
    20  	"SB", // REGSB
    21  	"R3",
    22  	"R4",
    23  	"R5",
    24  	"R6",
    25  	"R7",
    26  	"R8",
    27  	"R9",
    28  	"R10",
    29  	"R11", // REGCTXT for closures
    30  	"R12",
    31  	"R13", // REGTLS
    32  	"R14",
    33  	"R15",
    34  	"R16",
    35  	"R17",
    36  	"R18",
    37  	"R19",
    38  	"R20",
    39  	"R21",
    40  	"R22",
    41  	"R23",
    42  	"R24",
    43  	"R25",
    44  	"R26",
    45  	"R27",
    46  	"R28",
    47  	"R29",
    48  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    49  	"R31", // REGTMP
    50  
    51  	"F0",
    52  	"F1",
    53  	"F2",
    54  	"F3",
    55  	"F4",
    56  	"F5",
    57  	"F6",
    58  	"F7",
    59  	"F8",
    60  	"F9",
    61  	"F10",
    62  	"F11",
    63  	"F12",
    64  	"F13",
    65  	"F14",
    66  	"F15",
    67  	"F16",
    68  	"F17",
    69  	"F18",
    70  	"F19",
    71  	"F20",
    72  	"F21",
    73  	"F22",
    74  	"F23",
    75  	"F24",
    76  	"F25",
    77  	"F26",
    78  	"F27",
    79  	"F28",
    80  	"F29",
    81  	"F30",
    82  	// "F31", the allocator is limited to 64 entries. We sacrifice this FPR to support XER.
    83  
    84  	"XER",
    85  
    86  	// If you add registers, update asyncPreempt in runtime.
    87  
    88  	// "CR0",
    89  	// "CR1",
    90  	// "CR2",
    91  	// "CR3",
    92  	// "CR4",
    93  	// "CR5",
    94  	// "CR6",
    95  	// "CR7",
    96  
    97  	// "CR",
    98  	// "LR",
    99  	// "CTR",
   100  }
   101  
   102  func init() {
   103  	// Make map from reg names to reg integers.
   104  	if len(regNamesPPC64) > 64 {
   105  		panic("too many registers")
   106  	}
   107  	num := map[string]int{}
   108  	for i, name := range regNamesPPC64 {
   109  		num[name] = i
   110  	}
   111  	buildReg := func(s string) regMask {
   112  		m := regMask(0)
   113  		for _, r := range strings.Split(s, " ") {
   114  			if n, ok := num[r]; ok {
   115  				m |= regMask(1) << uint(n)
   116  				continue
   117  			}
   118  			panic("register " + r + " not found")
   119  		}
   120  		return m
   121  	}
   122  
   123  	var (
   124  		gp  = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   125  		fp  = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30")
   126  		sp  = buildReg("SP")
   127  		sb  = buildReg("SB")
   128  		gr  = buildReg("g")
   129  		xer = buildReg("XER")
   130  		// cr  = buildReg("CR")
   131  		// ctr = buildReg("CTR")
   132  		// lr  = buildReg("LR")
   133  		tmp     = buildReg("R31")
   134  		ctxt    = buildReg("R11")
   135  		callptr = buildReg("R12")
   136  		// tls = buildReg("R13")
   137  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   138  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   139  		xergp       = regInfo{inputs: []regMask{xer}, outputs: []regMask{gp}, clobbers: xer}
   140  		gp11cxer    = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   141  		gp11xer     = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp, xer}}
   142  		gp1xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   143  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   144  		gp21a0      = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
   145  		gp21cxer    = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   146  		gp21xer     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer}
   147  		gp2xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   148  		gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   149  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   150  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   151  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   152  		crgp11      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
   153  		crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
   154  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   155  		gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   156  		prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
   157  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   158  		gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   159  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   160  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   161  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   162  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   163  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   164  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   165  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   166  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   167  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   168  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   169  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   170  		fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   171  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   172  		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   173  		callerSave  = regMask(gp | fp | gr | xer)
   174  		r3          = buildReg("R3")
   175  		r4          = buildReg("R4")
   176  		r5          = buildReg("R5")
   177  		r6          = buildReg("R6")
   178  	)
   179  	ops := []opData{
   180  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},                              // arg0 + arg1
   181  		{name: "ADDCC", argLength: 2, reg: gp21, asm: "ADDCC", commutative: true, typ: "(Int,Flags)"},      // arg0 + arg1
   182  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},                              // arg0 + auxInt
   183  		{name: "ADDCCconst", argLength: 1, reg: gp11cxer, asm: "ADDCCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0 + auxInt sets CC, clobbers XER
   184  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},                            // arg0+arg1
   185  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true},                          // arg0+arg1
   186  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                                                 // arg0-arg1
   187  		{name: "SUBCC", argLength: 2, reg: gp21, asm: "SUBCC", typ: "(Int,Flags)"},                         // arg0-arg1 sets CC
   188  		{name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"},                       // auxInt - arg0 (carry is ignored)
   189  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                                               // arg0-arg1
   190  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                                             // arg0-arg1
   191  
   192  		// Note, the FPU works with float64 in register.
   193  		{name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
   194  		{name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
   195  
   196  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   197  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   198  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   199  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   200  		{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
   201  
   202  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},                             // (arg0 * arg1) >> 64, signed
   203  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},                             // (arg0 * arg1) >> 32, signed
   204  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true},                           // (arg0 * arg1) >> 64, unsigned
   205  		{name: "MULHDUCC", argLength: 2, reg: gp21, asm: "MULHDUCC", commutative: true, typ: "(Int64,Flags)"}, // (arg0 * arg1) >> 64, unsigned, sets CC
   206  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true},                           // (arg0 * arg1) >> 32, unsigned
   207  
   208  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   209  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   210  
   211  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   212  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   213  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   214  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   215  
   216  		{name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!)
   217  		{name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width
   218  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},       // unsigned arg0 >> (arg1&127), 64 bit width
   219  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},       // unsigned arg0 >> (arg1&63), 32 bit width
   220  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},       // arg0 << (arg1&127), 64 bit width
   221  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},       // arg0 << (arg1&63), 32 bit width
   222  
   223  		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   224  		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   225  		// The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA.
   226  		// The constant shift values are packed into the aux int32.
   227  		{name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, //
   228  		{name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, //
   229  
   230  		// Operations which consume or generate the CA (xer)
   231  		{name: "ADDC", argLength: 2, reg: gp21xer, asm: "ADDC", commutative: true, typ: "(UInt64, UInt64)"},    // arg0 + arg1 -> out, CA
   232  		{name: "SUBC", argLength: 2, reg: gp21xer, asm: "SUBC", typ: "(UInt64, UInt64)"},                       // arg0 - arg1 -> out, CA
   233  		{name: "ADDCconst", argLength: 1, reg: gp11xer, asm: "ADDC", typ: "(UInt64, UInt64)", aux: "Int64"},    // arg0 + imm16 -> out, CA
   234  		{name: "SUBCconst", argLength: 1, reg: gp11xer, asm: "SUBC", typ: "(UInt64, UInt64)", aux: "Int64"},    // imm16 - arg0 -> out, CA
   235  		{name: "ADDE", argLength: 3, reg: gp2xer1xer, asm: "ADDE", typ: "(UInt64, UInt64)", commutative: true}, // arg0 + arg1 + CA (arg2) -> out, CA
   236  		{name: "ADDZE", argLength: 2, reg: gp1xer1xer, asm: "ADDZE", typ: "(UInt64, UInt64)"},                  // arg0 + CA (arg1) -> out, CA
   237  		{name: "SUBE", argLength: 3, reg: gp2xer1xer, asm: "SUBE", typ: "(UInt64, UInt64)"},                    // arg0 - arg1 - CA (arg2) -> out, CA
   238  		{name: "ADDZEzero", argLength: 1, reg: xergp, asm: "ADDZE", typ: "UInt64"},                             // CA (arg0) + $0 -> out
   239  		{name: "SUBZEzero", argLength: 1, reg: xergp, asm: "SUBZE", typ: "UInt64"},                             // $0 - CA (arg0) -> out
   240  
   241  		{name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   242  		{name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   243  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   244  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   245  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width
   246  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width
   247  
   248  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   249  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   250  		{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
   251  
   252  		{name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"},                           // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
   253  		{name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"},                            // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
   254  		{name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true},      // "rlwimi" similar aux encoding as above
   255  		{name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"},                          // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63.
   256  		{name: "RLDICLCC", argLength: 1, reg: gp11, asm: "RLDICLCC", aux: "Int64", typ: "(Int, Flags)"}, // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63. Sets CC.
   257  		{name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"},                          // Likewise, but only ME and SH are valid. MB is always 0.
   258  
   259  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD"},                          // count leading zeros
   260  		{name: "CNTLZDCC", argLength: 1, reg: gp11, asm: "CNTLZDCC", typ: "(Int, Flags)"}, // count leading zeros, sets CC
   261  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW"},                          // count leading zeros (32 bit)
   262  
   263  		{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
   264  		{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
   265  
   266  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   267  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   268  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte
   269  
   270  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   271  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   272  
   273  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   274  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   275  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   276  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   277  
   278  		{name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
   279  		{name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
   280  		{name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
   281  		{name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
   282  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   283  
   284  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   285  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   286  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   287  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   288  		{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   289  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   290  
   291  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   292  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   293  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
   294  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   295  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   296  
   297  		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   298  		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   299  
   300  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                           // arg0&arg1
   301  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                            // arg0&^arg1
   302  		{name: "ANDNCC", argLength: 2, reg: gp21, asm: "ANDNCC", typ: "(Int64,Flags)"},                  // arg0&^arg1 sets CC
   303  		{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
   304  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                             // arg0|arg1
   305  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                              // arg0|^arg1
   306  		{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "(Int,Flags)"},     // arg0|arg1 sets CC
   307  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                           // ^(arg0|arg1)
   308  		{name: "NORCC", argLength: 2, reg: gp21, asm: "NORCC", commutative: true, typ: "(Int,Flags)"},   // ^(arg0|arg1) sets CC
   309  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},             // arg0^arg1
   310  		{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "(Int,Flags)"},   // arg0^arg1 sets CC
   311  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},             // arg0^^arg1
   312  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                              // -arg0 (integer)
   313  		{name: "NEGCC", argLength: 1, reg: gp11, asm: "NEGCC", typ: "(Int,Flags)"},                      // -arg0 (integer) sets CC
   314  		{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"},                                              // reversebytes64(arg0)
   315  		{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"},                                              // reversebytes32(arg0)
   316  		{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"},                                              // reversebytes16(arg0)
   317  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                            // -arg0 (floating point)
   318  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                          // sqrt(arg0) (floating point)
   319  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                        // sqrt(arg0) (floating point, single precision)
   320  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                          // floor(arg0), float64
   321  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                           // ceil(arg0), float64
   322  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                          // trunc(arg0), float64
   323  		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                          // round(arg0), float64
   324  		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                            // abs(arg0), float64
   325  		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                          // -abs(arg0), float64
   326  		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                        // copysign arg0 -> arg1, float64
   327  
   328  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                                 // arg0|aux
   329  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                               // arg0^aux
   330  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", typ: "(Int,Flags)"},           // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   331  		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, clobberFlags: true, asm: "ANDCC", aux: "Int64", typ: "Int"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   332  
   333  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   334  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   335  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   336  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   337  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   338  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   339  
   340  		// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   341  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   342  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   343  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   344  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   345  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   346  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   347  
   348  		// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   349  		// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   350  		// In these cases the index register field is set to 0 and the full address is in the base register.
   351  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
   352  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
   353  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
   354  
   355  		// In these cases an index register is used in addition to a base register
   356  		// Loads from memory location arg[0] + arg[1].
   357  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"},  // zero extend uint8 to uint64
   358  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"},    // sign extend int16 to int64
   359  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64
   360  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"},    // sign extend int32 to int64
   361  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64
   362  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"},
   363  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64
   364  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64
   365  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"},
   366  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
   367  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
   368  
   369  		// Prefetch instruction
   370  		// Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
   371  		{name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
   372  
   373  		// Store bytes in the reverse endian order of the arch into arg0.
   374  		// These are indexed stores with no offset field in the instruction so the auxint fields are not used.
   375  		{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
   376  		{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
   377  		{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
   378  
   379  		// Floating point loads from arg0+aux+auxint
   380  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   381  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   382  
   383  		// Store bytes in the endian order of the arch into arg0+aux+auxint
   384  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   385  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   386  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   387  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   388  
   389  		// Store floating point value into arg0+aux+auxint
   390  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   391  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   392  
   393  		// Stores using index and base registers
   394  		// Stores to arg[0] + arg[1]
   395  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"},     // store bye
   396  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"},     // store half word
   397  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"},     // store word
   398  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"},     // store double word
   399  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"},   // store double float
   400  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"},   // store single float
   401  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg
   402  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg
   403  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg
   404  
   405  		// The following ops store 0 into arg0+aux+auxint arg1=mem
   406  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   407  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   408  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   409  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   410  
   411  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   412  
   413  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   414  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   415  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   416  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   417  
   418  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   419  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   420  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   421  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   422  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   423  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   424  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   425  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   426  
   427  		// ISEL  arg2 ? arg0 : arg1
   428  		// ISELZ arg1 ? arg0 : $0
   429  		// auxInt values 0=LT 1=GT 2=EQ 3=SO (summary overflow/unordered) 4=GE 5=LE 6=NE 7=NSO (not summary overflow/not unordered)
   430  		// Note, auxInt^4 inverts the comparison condition. For example, LT^4 becomes GE, and "ISEL [a] x y z" is equivalent to ISEL [a^4] y x z".
   431  		{name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"},
   432  		{name: "ISELZ", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32"},
   433  
   434  		// SETBC auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 1 : 0
   435  		{name: "SETBC", argLength: 1, reg: crgp, asm: "SETBC", aux: "Int32", typ: "Int32"},
   436  		// SETBCR auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 0 : 1
   437  		{name: "SETBCR", argLength: 1, reg: crgp, asm: "SETBCR", aux: "Int32", typ: "Int32"},
   438  
   439  		// pseudo-ops
   440  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   441  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   442  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   443  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   444  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   445  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   446  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   447  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   448  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   449  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   450  
   451  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   452  		// and sorts it to the very beginning of the block to prevent other
   453  		// use of the closure pointer.
   454  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   455  
   456  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   457  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   458  
   459  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   460  		// I.e., if f calls g "calls" sys.GetCallerPC,
   461  		// the result should be the PC within f that g will return to.
   462  		// See runtime/stubs.go for a more detailed discussion.
   463  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   464  
   465  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   466  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   467  		// Round ops to block fused-multiply-add extraction.
   468  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   469  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   470  
   471  		{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                       // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   472  		{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                         // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   473  		{name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   474  		{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   475  
   476  		// large or unaligned zeroing
   477  		// arg0 = address of memory to zero (in R3, changed as side effect)
   478  		// returns mem
   479  		//
   480  		// a loop is generated when there is more than one iteration
   481  		// needed to clear 4 doublewords
   482  		//
   483  		//	XXLXOR	VS32,VS32,VS32
   484  		// 	MOVD	$len/32,R31
   485  		//	MOVD	R31,CTR
   486  		//	MOVD	$16,R31
   487  		//	loop:
   488  		//	STXVD2X VS32,(R0)(R3)
   489  		//	STXVD2X	VS32,(R31)(R3)
   490  		//	ADD	R3,32
   491  		//	BC	loop
   492  
   493  		// remaining doubleword clears generated as needed
   494  		//	MOVD	R0,(R3)
   495  		//	MOVD	R0,8(R3)
   496  		//	MOVD	R0,16(R3)
   497  		//	MOVD	R0,24(R3)
   498  
   499  		// one or more of these to clear remainder < 8 bytes
   500  		//	MOVW	R0,n1(R3)
   501  		//	MOVH	R0,n2(R3)
   502  		//	MOVB	R0,n3(R3)
   503  		{
   504  			name:      "LoweredZero",
   505  			aux:       "Int64",
   506  			argLength: 2,
   507  			reg: regInfo{
   508  				inputs:   []regMask{buildReg("R20")},
   509  				clobbers: buildReg("R20"),
   510  			},
   511  			clobberFlags:   true,
   512  			typ:            "Mem",
   513  			faultOnNilArg0: true,
   514  			unsafePoint:    true,
   515  		},
   516  		{
   517  			name:      "LoweredZeroShort",
   518  			aux:       "Int64",
   519  			argLength: 2,
   520  			reg: regInfo{
   521  				inputs: []regMask{gp}},
   522  			typ:            "Mem",
   523  			faultOnNilArg0: true,
   524  			unsafePoint:    true,
   525  		},
   526  		{
   527  			name:      "LoweredQuadZeroShort",
   528  			aux:       "Int64",
   529  			argLength: 2,
   530  			reg: regInfo{
   531  				inputs: []regMask{gp},
   532  			},
   533  			typ:            "Mem",
   534  			faultOnNilArg0: true,
   535  			unsafePoint:    true,
   536  		},
   537  		{
   538  			name:      "LoweredQuadZero",
   539  			aux:       "Int64",
   540  			argLength: 2,
   541  			reg: regInfo{
   542  				inputs:   []regMask{buildReg("R20")},
   543  				clobbers: buildReg("R20"),
   544  			},
   545  			clobberFlags:   true,
   546  			typ:            "Mem",
   547  			faultOnNilArg0: true,
   548  			unsafePoint:    true,
   549  		},
   550  
   551  		// R31 is temp register
   552  		// Loop code:
   553  		//	MOVD len/32,R31		set up loop ctr
   554  		//	MOVD R31,CTR
   555  		//	MOVD $16,R31		index register
   556  		// loop:
   557  		//	LXVD2X (R0)(R4),VS32
   558  		//	LXVD2X (R31)(R4),VS33
   559  		//	ADD  R4,$32          increment src
   560  		//	STXVD2X VS32,(R0)(R3)
   561  		//	STXVD2X VS33,(R31)(R3)
   562  		//	ADD  R3,$32          increment dst
   563  		//	BC 16,0,loop         branch ctr
   564  		// For this purpose, VS32 and VS33 are treated as
   565  		// scratch registers. Since regalloc does not
   566  		// track vector registers, even if it could be marked
   567  		// as clobbered it would have no effect.
   568  		// TODO: If vector registers are managed by regalloc
   569  		// mark these as clobbered.
   570  		//
   571  		// Bytes not moved by this loop are moved
   572  		// with a combination of the following instructions,
   573  		// starting with the largest sizes and generating as
   574  		// many as needed, using the appropriate offset value.
   575  		//	MOVD  n(R4),R14
   576  		//	MOVD  R14,n(R3)
   577  		//	MOVW  n1(R4),R14
   578  		//	MOVW  R14,n1(R3)
   579  		//	MOVH  n2(R4),R14
   580  		//	MOVH  R14,n2(R3)
   581  		//	MOVB  n3(R4),R14
   582  		//	MOVB  R14,n3(R3)
   583  
   584  		{
   585  			name:      "LoweredMove",
   586  			aux:       "Int64",
   587  			argLength: 3,
   588  			reg: regInfo{
   589  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   590  				clobbers: buildReg("R20 R21"),
   591  			},
   592  			clobberFlags:   true,
   593  			typ:            "Mem",
   594  			faultOnNilArg0: true,
   595  			faultOnNilArg1: true,
   596  			unsafePoint:    true,
   597  		},
   598  		{
   599  			name:      "LoweredMoveShort",
   600  			aux:       "Int64",
   601  			argLength: 3,
   602  			reg: regInfo{
   603  				inputs: []regMask{gp, gp},
   604  			},
   605  			typ:            "Mem",
   606  			faultOnNilArg0: true,
   607  			faultOnNilArg1: true,
   608  			unsafePoint:    true,
   609  		},
   610  
   611  		// The following is similar to the LoweredMove, but uses
   612  		// LXV instead of LXVD2X, which does not require an index
   613  		// register and will do 4 in a loop instead of only.
   614  		{
   615  			name:      "LoweredQuadMove",
   616  			aux:       "Int64",
   617  			argLength: 3,
   618  			reg: regInfo{
   619  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   620  				clobbers: buildReg("R20 R21"),
   621  			},
   622  			clobberFlags:   true,
   623  			typ:            "Mem",
   624  			faultOnNilArg0: true,
   625  			faultOnNilArg1: true,
   626  			unsafePoint:    true,
   627  		},
   628  
   629  		{
   630  			name:      "LoweredQuadMoveShort",
   631  			aux:       "Int64",
   632  			argLength: 3,
   633  			reg: regInfo{
   634  				inputs: []regMask{gp, gp},
   635  			},
   636  			typ:            "Mem",
   637  			faultOnNilArg0: true,
   638  			faultOnNilArg1: true,
   639  			unsafePoint:    true,
   640  		},
   641  
   642  		{name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   643  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   644  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   645  
   646  		{name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   647  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   648  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   649  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   650  
   651  		// atomic add32, 64
   652  		// LWSYNC
   653  		// LDAR         (Rarg0), Rout
   654  		// ADD		Rarg1, Rout
   655  		// STDCCC       Rout, (Rarg0)
   656  		// BNE          -3(PC)
   657  		// return new sum
   658  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   659  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   660  
   661  		// atomic exchange8, 32, 64
   662  		// LWSYNC
   663  		// LDAR         (Rarg0), Rout
   664  		// STDCCC       Rarg1, (Rarg0)
   665  		// BNE          -2(PC)
   666  		// ISYNC
   667  		// return old val
   668  		{name: "LoweredAtomicExchange8", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   669  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   670  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   671  
   672  		// atomic compare and swap.
   673  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   674  		// if *arg0 == arg1 {
   675  		//   *arg0 = arg2
   676  		//   return (true, memory)
   677  		// } else {
   678  		//   return (false, memory)
   679  		// }
   680  		// SYNC
   681  		// LDAR		(Rarg0), Rtmp
   682  		// CMP		Rarg1, Rtmp
   683  		// BNE		3(PC)
   684  		// STDCCC	Rarg2, (Rarg0)
   685  		// BNE		-4(PC)
   686  		// CBNZ         Rtmp, -4(PC)
   687  		// CSET         EQ, Rout
   688  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   689  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   690  
   691  		// atomic 8/32 and/or.
   692  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   693  		// LBAR/LWAT	(Rarg0), Rtmp
   694  		// AND/OR	Rarg1, Rtmp
   695  		// STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
   696  		// BNE		Rtmp, -3(PC)
   697  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   698  		{name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   699  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   700  		{name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   701  
   702  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
   703  		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and R20 and R21,
   704  		// but may clobber anything else, including R31 (REGTMP).
   705  		// Returns a pointer to a write barrier buffer in R29.
   706  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
   707  
   708  		{name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory
   709  		// There are three of these functions so that they can have three different register inputs.
   710  		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
   711  		// default registers to match so we don't need to copy registers around unnecessarily.
   712  		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   713  		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   714  		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   715  
   716  		// (InvertFlags (CMP a b)) == (CMP b a)
   717  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   718  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   719  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   720  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   721  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   722  
   723  		// Constant flag values. For any comparison, there are 3 possible
   724  		// outcomes: either the three from the signed total order (<,==,>)
   725  		// or the three from the unsigned total order, depending on which
   726  		// comparison operation was used (CMP or CMPU -- PPC is different from
   727  		// the other architectures, which have a single comparison producing
   728  		// both signed and unsigned comparison results.)
   729  
   730  		// These ops are for temporary use by rewrite rules. They
   731  		// cannot appear in the generated assembly.
   732  		{name: "FlagEQ"}, // equal
   733  		{name: "FlagLT"}, // signed < or unsigned <
   734  		{name: "FlagGT"}, // signed > or unsigned >
   735  	}
   736  
   737  	blocks := []blockData{
   738  		{name: "EQ", controls: 1},
   739  		{name: "NE", controls: 1},
   740  		{name: "LT", controls: 1},
   741  		{name: "LE", controls: 1},
   742  		{name: "GT", controls: 1},
   743  		{name: "GE", controls: 1},
   744  		{name: "FLT", controls: 1},
   745  		{name: "FLE", controls: 1},
   746  		{name: "FGT", controls: 1},
   747  		{name: "FGE", controls: 1},
   748  	}
   749  
   750  	archs = append(archs, arch{
   751  		name:               "PPC64",
   752  		pkg:                "cmd/internal/obj/ppc64",
   753  		genfile:            "../../ppc64/ssa.go",
   754  		ops:                ops,
   755  		blocks:             blocks,
   756  		regnames:           regNamesPPC64,
   757  		ParamIntRegNames:   "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17",
   758  		ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12",
   759  		gpregmask:          gp,
   760  		fpregmask:          fp,
   761  		specialregmask:     xer,
   762  		framepointerreg:    -1,
   763  		linkreg:            -1, // not used
   764  	})
   765  }
   766
View as plain text