Source file src/cmd/compile/internal/loong64/ssa.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package loong64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/loong64"
    19  	"internal/abi"
    20  )
    21  
    22  // isFPreg reports whether r is an FP register.
    23  func isFPreg(r int16) bool {
    24  	return loong64.REG_F0 <= r && r <= loong64.REG_F31
    25  }
    26  
    27  // loadByType returns the load instruction of the given type.
    28  func loadByType(t *types.Type, r int16) obj.As {
    29  	if isFPreg(r) {
    30  		if t.Size() == 4 {
    31  			return loong64.AMOVF
    32  		} else {
    33  			return loong64.AMOVD
    34  		}
    35  	} else {
    36  		switch t.Size() {
    37  		case 1:
    38  			if t.IsSigned() {
    39  				return loong64.AMOVB
    40  			} else {
    41  				return loong64.AMOVBU
    42  			}
    43  		case 2:
    44  			if t.IsSigned() {
    45  				return loong64.AMOVH
    46  			} else {
    47  				return loong64.AMOVHU
    48  			}
    49  		case 4:
    50  			if t.IsSigned() {
    51  				return loong64.AMOVW
    52  			} else {
    53  				return loong64.AMOVWU
    54  			}
    55  		case 8:
    56  			return loong64.AMOVV
    57  		}
    58  	}
    59  	panic("bad load type")
    60  }
    61  
    62  // storeByType returns the store instruction of the given type.
    63  func storeByType(t *types.Type, r int16) obj.As {
    64  	if isFPreg(r) {
    65  		if t.Size() == 4 {
    66  			return loong64.AMOVF
    67  		} else {
    68  			return loong64.AMOVD
    69  		}
    70  	} else {
    71  		switch t.Size() {
    72  		case 1:
    73  			return loong64.AMOVB
    74  		case 2:
    75  			return loong64.AMOVH
    76  		case 4:
    77  			return loong64.AMOVW
    78  		case 8:
    79  			return loong64.AMOVV
    80  		}
    81  	}
    82  	panic("bad store type")
    83  }
    84  
    85  // largestMove returns the largest move instruction possible and its size,
    86  // given the alignment of the total size of the move.
    87  //
    88  // e.g., a 16-byte move may use MOVV, but an 11-byte move must use MOVB.
    89  //
    90  // Note that the moves may not be on naturally aligned addresses depending on
    91  // the source and destination.
    92  //
    93  // This matches the calculation in ssa.moveSize.
    94  func largestMove(alignment int64) (obj.As, int64) {
    95  	switch {
    96  	case alignment%8 == 0:
    97  		return loong64.AMOVV, 8
    98  	case alignment%4 == 0:
    99  		return loong64.AMOVW, 4
   100  	case alignment%2 == 0:
   101  		return loong64.AMOVH, 2
   102  	default:
   103  		return loong64.AMOVB, 1
   104  	}
   105  }
   106  
   107  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   108  	switch v.Op {
   109  	case ssa.OpCopy, ssa.OpLOONG64MOVVreg:
   110  		if v.Type.IsMemory() {
   111  			return
   112  		}
   113  		x := v.Args[0].Reg()
   114  		y := v.Reg()
   115  		if x == y {
   116  			return
   117  		}
   118  		as := loong64.AMOVV
   119  		if isFPreg(x) && isFPreg(y) {
   120  			as = loong64.AMOVD
   121  		}
   122  		p := s.Prog(as)
   123  		p.From.Type = obj.TYPE_REG
   124  		p.From.Reg = x
   125  		p.To.Type = obj.TYPE_REG
   126  		p.To.Reg = y
   127  	case ssa.OpLOONG64MOVVnop,
   128  		ssa.OpLOONG64ZERO,
   129  		ssa.OpLOONG64LoweredRound32F,
   130  		ssa.OpLOONG64LoweredRound64F:
   131  		// nothing to do
   132  	case ssa.OpLoadReg:
   133  		if v.Type.IsFlags() {
   134  			v.Fatalf("load flags not implemented: %v", v.LongString())
   135  			return
   136  		}
   137  		r := v.Reg()
   138  		p := s.Prog(loadByType(v.Type, r))
   139  		ssagen.AddrAuto(&p.From, v.Args[0])
   140  		p.To.Type = obj.TYPE_REG
   141  		p.To.Reg = r
   142  	case ssa.OpStoreReg:
   143  		if v.Type.IsFlags() {
   144  			v.Fatalf("store flags not implemented: %v", v.LongString())
   145  			return
   146  		}
   147  		r := v.Args[0].Reg()
   148  		p := s.Prog(storeByType(v.Type, r))
   149  		p.From.Type = obj.TYPE_REG
   150  		p.From.Reg = r
   151  		ssagen.AddrAuto(&p.To, v)
   152  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   153  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   154  		// The loop only runs once.
   155  		for _, a := range v.Block.Func.RegArgs {
   156  			// Pass the spill/unspill information along to the assembler, offset by size of
   157  			// the saved LR slot.
   158  			addr := ssagen.SpillSlotAddr(a, loong64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   159  			s.FuncInfo().AddSpill(
   160  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type, a.Reg), Spill: storeByType(a.Type, a.Reg)})
   161  		}
   162  		v.Block.Func.RegArgs = nil
   163  		ssagen.CheckArgReg(v)
   164  	case ssa.OpLOONG64ADDV,
   165  		ssa.OpLOONG64SUBV,
   166  		ssa.OpLOONG64AND,
   167  		ssa.OpLOONG64OR,
   168  		ssa.OpLOONG64XOR,
   169  		ssa.OpLOONG64NOR,
   170  		ssa.OpLOONG64ANDN,
   171  		ssa.OpLOONG64ORN,
   172  		ssa.OpLOONG64SLL,
   173  		ssa.OpLOONG64SLLV,
   174  		ssa.OpLOONG64SRL,
   175  		ssa.OpLOONG64SRLV,
   176  		ssa.OpLOONG64SRA,
   177  		ssa.OpLOONG64SRAV,
   178  		ssa.OpLOONG64ROTR,
   179  		ssa.OpLOONG64ROTRV,
   180  		ssa.OpLOONG64ADDF,
   181  		ssa.OpLOONG64ADDD,
   182  		ssa.OpLOONG64SUBF,
   183  		ssa.OpLOONG64SUBD,
   184  		ssa.OpLOONG64MULF,
   185  		ssa.OpLOONG64MULD,
   186  		ssa.OpLOONG64DIVF,
   187  		ssa.OpLOONG64DIVD,
   188  		ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU, ssa.OpLOONG64MULH, ssa.OpLOONG64MULHU,
   189  		ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU,
   190  		ssa.OpLOONG64MULWVW, ssa.OpLOONG64MULWVWU,
   191  		ssa.OpLOONG64FCOPYSGD:
   192  		p := s.Prog(v.Op.Asm())
   193  		p.From.Type = obj.TYPE_REG
   194  		p.From.Reg = v.Args[1].Reg()
   195  		p.Reg = v.Args[0].Reg()
   196  		p.To.Type = obj.TYPE_REG
   197  		p.To.Reg = v.Reg()
   198  
   199  	case ssa.OpLOONG64BSTRPICKV,
   200  		ssa.OpLOONG64BSTRPICKW:
   201  		p := s.Prog(v.Op.Asm())
   202  		p.From.Type = obj.TYPE_CONST
   203  		if v.Op == ssa.OpLOONG64BSTRPICKW {
   204  			p.From.Offset = v.AuxInt >> 5
   205  			p.AddRestSourceConst(v.AuxInt & 0x1f)
   206  		} else {
   207  			p.From.Offset = v.AuxInt >> 6
   208  			p.AddRestSourceConst(v.AuxInt & 0x3f)
   209  		}
   210  		p.Reg = v.Args[0].Reg()
   211  		p.To.Type = obj.TYPE_REG
   212  		p.To.Reg = v.Reg()
   213  
   214  	case ssa.OpLOONG64FMINF,
   215  		ssa.OpLOONG64FMIND,
   216  		ssa.OpLOONG64FMAXF,
   217  		ssa.OpLOONG64FMAXD:
   218  		// ADDD Rarg0, Rarg1, Rout
   219  		// CMPEQD Rarg0, Rarg0, FCC0
   220  		// bceqz FCC0, end
   221  		// CMPEQD Rarg1, Rarg1, FCC0
   222  		// bceqz FCC0, end
   223  		// F(MIN|MAX)(F|D)
   224  
   225  		r0 := v.Args[0].Reg()
   226  		r1 := v.Args[1].Reg()
   227  		out := v.Reg()
   228  		add, fcmp := loong64.AADDD, loong64.ACMPEQD
   229  		if v.Op == ssa.OpLOONG64FMINF || v.Op == ssa.OpLOONG64FMAXF {
   230  			add = loong64.AADDF
   231  			fcmp = loong64.ACMPEQF
   232  		}
   233  		p1 := s.Prog(add)
   234  		p1.From.Type = obj.TYPE_REG
   235  		p1.From.Reg = r0
   236  		p1.Reg = r1
   237  		p1.To.Type = obj.TYPE_REG
   238  		p1.To.Reg = out
   239  
   240  		p2 := s.Prog(fcmp)
   241  		p2.From.Type = obj.TYPE_REG
   242  		p2.From.Reg = r0
   243  		p2.Reg = r0
   244  		p2.To.Type = obj.TYPE_REG
   245  		p2.To.Reg = loong64.REG_FCC0
   246  
   247  		p3 := s.Prog(loong64.ABFPF)
   248  		p3.To.Type = obj.TYPE_BRANCH
   249  
   250  		p4 := s.Prog(fcmp)
   251  		p4.From.Type = obj.TYPE_REG
   252  		p4.From.Reg = r1
   253  		p4.Reg = r1
   254  		p4.To.Type = obj.TYPE_REG
   255  		p4.To.Reg = loong64.REG_FCC0
   256  
   257  		p5 := s.Prog(loong64.ABFPF)
   258  		p5.To.Type = obj.TYPE_BRANCH
   259  
   260  		p6 := s.Prog(v.Op.Asm())
   261  		p6.From.Type = obj.TYPE_REG
   262  		p6.From.Reg = r1
   263  		p6.Reg = r0
   264  		p6.To.Type = obj.TYPE_REG
   265  		p6.To.Reg = out
   266  
   267  		nop := s.Prog(obj.ANOP)
   268  		p3.To.SetTarget(nop)
   269  		p5.To.SetTarget(nop)
   270  
   271  	case ssa.OpLOONG64SGT,
   272  		ssa.OpLOONG64SGTU:
   273  		p := s.Prog(v.Op.Asm())
   274  		p.From.Type = obj.TYPE_REG
   275  		p.From.Reg = v.Args[0].Reg()
   276  		p.Reg = v.Args[1].Reg()
   277  		p.To.Type = obj.TYPE_REG
   278  		p.To.Reg = v.Reg()
   279  	case ssa.OpLOONG64ADDVconst,
   280  		ssa.OpLOONG64ADDV16const,
   281  		ssa.OpLOONG64SUBVconst,
   282  		ssa.OpLOONG64ANDconst,
   283  		ssa.OpLOONG64ORconst,
   284  		ssa.OpLOONG64XORconst,
   285  		ssa.OpLOONG64SLLconst,
   286  		ssa.OpLOONG64SLLVconst,
   287  		ssa.OpLOONG64SRLconst,
   288  		ssa.OpLOONG64SRLVconst,
   289  		ssa.OpLOONG64SRAconst,
   290  		ssa.OpLOONG64SRAVconst,
   291  		ssa.OpLOONG64ROTRconst,
   292  		ssa.OpLOONG64ROTRVconst,
   293  		ssa.OpLOONG64SGTconst,
   294  		ssa.OpLOONG64SGTUconst:
   295  		p := s.Prog(v.Op.Asm())
   296  		p.From.Type = obj.TYPE_CONST
   297  		p.From.Offset = v.AuxInt
   298  		p.Reg = v.Args[0].Reg()
   299  		p.To.Type = obj.TYPE_REG
   300  		p.To.Reg = v.Reg()
   301  
   302  	case ssa.OpLOONG64NORconst:
   303  		// MOVV $const, Rtmp
   304  		// NOR  Rtmp, Rarg0, Rout
   305  		p := s.Prog(loong64.AMOVV)
   306  		p.From.Type = obj.TYPE_CONST
   307  		p.From.Offset = v.AuxInt
   308  		p.To.Type = obj.TYPE_REG
   309  		p.To.Reg = loong64.REGTMP
   310  
   311  		p2 := s.Prog(v.Op.Asm())
   312  		p2.From.Type = obj.TYPE_REG
   313  		p2.From.Reg = loong64.REGTMP
   314  		p2.Reg = v.Args[0].Reg()
   315  		p2.To.Type = obj.TYPE_REG
   316  		p2.To.Reg = v.Reg()
   317  
   318  	case ssa.OpLOONG64MOVVconst:
   319  		r := v.Reg()
   320  		p := s.Prog(v.Op.Asm())
   321  		p.From.Type = obj.TYPE_CONST
   322  		p.From.Offset = v.AuxInt
   323  		p.To.Type = obj.TYPE_REG
   324  		p.To.Reg = r
   325  		if isFPreg(r) {
   326  			// cannot move into FP or special registers, use TMP as intermediate
   327  			p.To.Reg = loong64.REGTMP
   328  			p = s.Prog(loong64.AMOVV)
   329  			p.From.Type = obj.TYPE_REG
   330  			p.From.Reg = loong64.REGTMP
   331  			p.To.Type = obj.TYPE_REG
   332  			p.To.Reg = r
   333  		}
   334  	case ssa.OpLOONG64MOVFconst,
   335  		ssa.OpLOONG64MOVDconst:
   336  		p := s.Prog(v.Op.Asm())
   337  		p.From.Type = obj.TYPE_FCONST
   338  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   339  		p.To.Type = obj.TYPE_REG
   340  		p.To.Reg = v.Reg()
   341  	case ssa.OpLOONG64CMPEQF,
   342  		ssa.OpLOONG64CMPEQD,
   343  		ssa.OpLOONG64CMPGEF,
   344  		ssa.OpLOONG64CMPGED,
   345  		ssa.OpLOONG64CMPGTF,
   346  		ssa.OpLOONG64CMPGTD:
   347  		p := s.Prog(v.Op.Asm())
   348  		p.From.Type = obj.TYPE_REG
   349  		p.From.Reg = v.Args[0].Reg()
   350  		p.Reg = v.Args[1].Reg()
   351  		p.To.Type = obj.TYPE_REG
   352  		p.To.Reg = loong64.REG_FCC0
   353  
   354  	case ssa.OpLOONG64FMADDF,
   355  		ssa.OpLOONG64FMADDD,
   356  		ssa.OpLOONG64FMSUBF,
   357  		ssa.OpLOONG64FMSUBD,
   358  		ssa.OpLOONG64FNMADDF,
   359  		ssa.OpLOONG64FNMADDD,
   360  		ssa.OpLOONG64FNMSUBF,
   361  		ssa.OpLOONG64FNMSUBD:
   362  		p := s.Prog(v.Op.Asm())
   363  		// r=(FMA x y z) -> FMADDD z, y, x, r
   364  		// the SSA operand order is for taking advantage of
   365  		// commutativity (that only applies for the first two operands)
   366  		r := v.Reg()
   367  		x := v.Args[0].Reg()
   368  		y := v.Args[1].Reg()
   369  		z := v.Args[2].Reg()
   370  		p.From.Type = obj.TYPE_REG
   371  		p.From.Reg = z
   372  		p.Reg = y
   373  		p.AddRestSourceReg(x)
   374  		p.To.Type = obj.TYPE_REG
   375  		p.To.Reg = r
   376  
   377  	case ssa.OpLOONG64MOVVaddr:
   378  		p := s.Prog(loong64.AMOVV)
   379  		p.From.Type = obj.TYPE_ADDR
   380  		p.From.Reg = v.Args[0].Reg()
   381  		var wantreg string
   382  		// MOVV $sym+off(base), R
   383  		// the assembler expands it as the following:
   384  		// - base is SP: add constant offset to SP (R3)
   385  		// when constant is large, tmp register (R30) may be used
   386  		// - base is SB: load external address with relocation
   387  		switch v.Aux.(type) {
   388  		default:
   389  			v.Fatalf("aux is of unknown type %T", v.Aux)
   390  		case *obj.LSym:
   391  			wantreg = "SB"
   392  			ssagen.AddAux(&p.From, v)
   393  		case *ir.Name:
   394  			wantreg = "SP"
   395  			ssagen.AddAux(&p.From, v)
   396  		case nil:
   397  			// No sym, just MOVV $off(SP), R
   398  			wantreg = "SP"
   399  			p.From.Offset = v.AuxInt
   400  		}
   401  		if reg := v.Args[0].RegName(); reg != wantreg {
   402  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   403  		}
   404  		p.To.Type = obj.TYPE_REG
   405  		p.To.Reg = v.Reg()
   406  
   407  	case ssa.OpLOONG64MOVBloadidx,
   408  		ssa.OpLOONG64MOVBUloadidx,
   409  		ssa.OpLOONG64MOVHloadidx,
   410  		ssa.OpLOONG64MOVHUloadidx,
   411  		ssa.OpLOONG64MOVWloadidx,
   412  		ssa.OpLOONG64MOVWUloadidx,
   413  		ssa.OpLOONG64MOVVloadidx,
   414  		ssa.OpLOONG64MOVFloadidx,
   415  		ssa.OpLOONG64MOVDloadidx:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_MEM
   418  		p.From.Name = obj.NAME_NONE
   419  		p.From.Reg = v.Args[0].Reg()
   420  		p.From.Index = v.Args[1].Reg()
   421  		p.To.Type = obj.TYPE_REG
   422  		p.To.Reg = v.Reg()
   423  
   424  	case ssa.OpLOONG64MOVBstoreidx,
   425  		ssa.OpLOONG64MOVHstoreidx,
   426  		ssa.OpLOONG64MOVWstoreidx,
   427  		ssa.OpLOONG64MOVVstoreidx,
   428  		ssa.OpLOONG64MOVFstoreidx,
   429  		ssa.OpLOONG64MOVDstoreidx:
   430  		p := s.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_REG
   432  		p.From.Reg = v.Args[2].Reg()
   433  		p.To.Type = obj.TYPE_MEM
   434  		p.To.Name = obj.NAME_NONE
   435  		p.To.Reg = v.Args[0].Reg()
   436  		p.To.Index = v.Args[1].Reg()
   437  
   438  	case ssa.OpLOONG64MOVBload,
   439  		ssa.OpLOONG64MOVBUload,
   440  		ssa.OpLOONG64MOVHload,
   441  		ssa.OpLOONG64MOVHUload,
   442  		ssa.OpLOONG64MOVWload,
   443  		ssa.OpLOONG64MOVWUload,
   444  		ssa.OpLOONG64MOVVload,
   445  		ssa.OpLOONG64MOVFload,
   446  		ssa.OpLOONG64MOVDload:
   447  		p := s.Prog(v.Op.Asm())
   448  		p.From.Type = obj.TYPE_MEM
   449  		p.From.Reg = v.Args[0].Reg()
   450  		ssagen.AddAux(&p.From, v)
   451  		p.To.Type = obj.TYPE_REG
   452  		p.To.Reg = v.Reg()
   453  	case ssa.OpLOONG64MOVBstore,
   454  		ssa.OpLOONG64MOVHstore,
   455  		ssa.OpLOONG64MOVWstore,
   456  		ssa.OpLOONG64MOVVstore,
   457  		ssa.OpLOONG64MOVFstore,
   458  		ssa.OpLOONG64MOVDstore:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_REG
   461  		p.From.Reg = v.Args[1].Reg()
   462  		p.To.Type = obj.TYPE_MEM
   463  		p.To.Reg = v.Args[0].Reg()
   464  		ssagen.AddAux(&p.To, v)
   465  	case ssa.OpLOONG64MOVBreg,
   466  		ssa.OpLOONG64MOVBUreg,
   467  		ssa.OpLOONG64MOVHreg,
   468  		ssa.OpLOONG64MOVHUreg,
   469  		ssa.OpLOONG64MOVWreg,
   470  		ssa.OpLOONG64MOVWUreg:
   471  		a := v.Args[0]
   472  		for a.Op == ssa.OpCopy || a.Op == ssa.OpLOONG64MOVVreg {
   473  			a = a.Args[0]
   474  		}
   475  		if a.Op == ssa.OpLoadReg && loong64.REG_R0 <= a.Reg() && a.Reg() <= loong64.REG_R31 {
   476  			// LoadReg from a narrower type does an extension, except loading
   477  			// to a floating point register. So only eliminate the extension
   478  			// if it is loaded to an integer register.
   479  
   480  			t := a.Type
   481  			switch {
   482  			case v.Op == ssa.OpLOONG64MOVBreg && t.Size() == 1 && t.IsSigned(),
   483  				v.Op == ssa.OpLOONG64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   484  				v.Op == ssa.OpLOONG64MOVHreg && t.Size() == 2 && t.IsSigned(),
   485  				v.Op == ssa.OpLOONG64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   486  				v.Op == ssa.OpLOONG64MOVWreg && t.Size() == 4 && t.IsSigned(),
   487  				v.Op == ssa.OpLOONG64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   488  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   489  				if v.Reg() == v.Args[0].Reg() {
   490  					return
   491  				}
   492  				p := s.Prog(loong64.AMOVV)
   493  				p.From.Type = obj.TYPE_REG
   494  				p.From.Reg = v.Args[0].Reg()
   495  				p.To.Type = obj.TYPE_REG
   496  				p.To.Reg = v.Reg()
   497  				return
   498  			default:
   499  			}
   500  		}
   501  		fallthrough
   502  
   503  	case ssa.OpLOONG64MOVWF,
   504  		ssa.OpLOONG64MOVWD,
   505  		ssa.OpLOONG64TRUNCFW,
   506  		ssa.OpLOONG64TRUNCDW,
   507  		ssa.OpLOONG64MOVVF,
   508  		ssa.OpLOONG64MOVVD,
   509  		ssa.OpLOONG64TRUNCFV,
   510  		ssa.OpLOONG64TRUNCDV,
   511  		ssa.OpLOONG64MOVFD,
   512  		ssa.OpLOONG64MOVDF,
   513  		ssa.OpLOONG64MOVWfpgp,
   514  		ssa.OpLOONG64MOVWgpfp,
   515  		ssa.OpLOONG64MOVVfpgp,
   516  		ssa.OpLOONG64MOVVgpfp,
   517  		ssa.OpLOONG64NEGF,
   518  		ssa.OpLOONG64NEGD,
   519  		ssa.OpLOONG64CLZW,
   520  		ssa.OpLOONG64CLZV,
   521  		ssa.OpLOONG64CTZW,
   522  		ssa.OpLOONG64CTZV,
   523  		ssa.OpLOONG64SQRTD,
   524  		ssa.OpLOONG64SQRTF,
   525  		ssa.OpLOONG64REVB2H,
   526  		ssa.OpLOONG64REVB2W,
   527  		ssa.OpLOONG64REVB4H,
   528  		ssa.OpLOONG64REVBV,
   529  		ssa.OpLOONG64BITREV4B,
   530  		ssa.OpLOONG64BITREVW,
   531  		ssa.OpLOONG64BITREVV,
   532  		ssa.OpLOONG64ABSF,
   533  		ssa.OpLOONG64ABSD:
   534  		p := s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = v.Args[0].Reg()
   537  		p.To.Type = obj.TYPE_REG
   538  		p.To.Reg = v.Reg()
   539  
   540  	case ssa.OpLOONG64VPCNT64,
   541  		ssa.OpLOONG64VPCNT32,
   542  		ssa.OpLOONG64VPCNT16:
   543  		p := s.Prog(v.Op.Asm())
   544  		p.From.Type = obj.TYPE_REG
   545  		p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = ((v.Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   548  
   549  	case ssa.OpLOONG64NEGV:
   550  		// SUB from REGZERO
   551  		p := s.Prog(loong64.ASUBVU)
   552  		p.From.Type = obj.TYPE_REG
   553  		p.From.Reg = v.Args[0].Reg()
   554  		p.Reg = loong64.REGZERO
   555  		p.To.Type = obj.TYPE_REG
   556  		p.To.Reg = v.Reg()
   557  
   558  	case ssa.OpLOONG64LoweredZero:
   559  		ptrReg := v.Args[0].Reg()
   560  		n := v.AuxInt
   561  		if n < 16 {
   562  			v.Fatalf("Zero too small %d", n)
   563  		}
   564  
   565  		// Generate Zeroing instructions.
   566  		var off int64
   567  		for n >= 8 {
   568  			// MOVV     ZR, off(ptrReg)
   569  			zero8(s, ptrReg, off)
   570  			off += 8
   571  			n -= 8
   572  		}
   573  		if n != 0 {
   574  			// MOVV     ZR, off+n-8(ptrReg)
   575  			zero8(s, ptrReg, off+n-8)
   576  		}
   577  	case ssa.OpLOONG64LoweredZeroLoop:
   578  		ptrReg := v.Args[0].Reg()
   579  		countReg := v.RegTmp()
   580  		flagReg := int16(loong64.REGTMP)
   581  		var off int64
   582  		n := v.AuxInt
   583  		loopSize := int64(64)
   584  		if n < 3*loopSize {
   585  			// - a loop count of 0 won't work.
   586  			// - a loop count of 1 is useless.
   587  			// - a loop count of 2 is a code size ~tie
   588  			//     4 instructions to implement the loop
   589  			//     8 instructions in the loop body
   590  			//   vs
   591  			//     16 instuctions in the straightline code
   592  			//   Might as well use straightline code.
   593  			v.Fatalf("ZeroLoop size too small %d", n)
   594  		}
   595  
   596  		//    MOVV    $n/loopSize, countReg
   597  		//    MOVBU   ir.Syms.Loong64HasLSX, flagReg
   598  		//    BNE     flagReg, lsxInit
   599  		// genericInit:
   600  		//    for off = 0; off < loopSize; off += 8 {
   601  		//            zero8(s, ptrReg, off)
   602  		//    }
   603  		//    ADDV    $loopSize, ptrReg
   604  		//    SUBV    $1, countReg
   605  		//    BNE     countReg, genericInit
   606  		//    JMP     tail
   607  		// lsxInit:
   608  		//    VXORV   V31, V31, V31, v31 = 0
   609  		//    for off = 0; off < loopSize; off += 16 {
   610  		//            zero16(s, V31, ptrReg, off)
   611  		//    }
   612  		//    ADDV    $loopSize, ptrReg
   613  		//    SUBV    $1, countReg
   614  		//    BNE     countReg, lsxInit
   615  		// tail:
   616  		//    n %= loopSize
   617  		//    for off = 0; n >= 8; off += 8, n -= 8 {
   618  		//            zero8(s, ptrReg, off)
   619  		//    }
   620  		//
   621  		//    if n != 0 {
   622  		//           zero8(s, ptrReg, off+n-8)
   623  		//    }
   624  
   625  		p1 := s.Prog(loong64.AMOVV)
   626  		p1.From.Type = obj.TYPE_CONST
   627  		p1.From.Offset = n / loopSize
   628  		p1.To.Type = obj.TYPE_REG
   629  		p1.To.Reg = countReg
   630  
   631  		p2 := s.Prog(loong64.AMOVBU)
   632  		p2.From.Type = obj.TYPE_MEM
   633  		p2.From.Name = obj.NAME_EXTERN
   634  		p2.From.Sym = ir.Syms.Loong64HasLSX
   635  		p2.To.Type = obj.TYPE_REG
   636  		p2.To.Reg = flagReg
   637  
   638  		p3 := s.Prog(loong64.ABNE)
   639  		p3.From.Type = obj.TYPE_REG
   640  		p3.From.Reg = flagReg
   641  		p3.To.Type = obj.TYPE_BRANCH
   642  
   643  		for off = 0; off < loopSize; off += 8 {
   644  			zero8(s, ptrReg, off)
   645  		}
   646  
   647  		p4 := s.Prog(loong64.AADDV)
   648  		p4.From.Type = obj.TYPE_CONST
   649  		p4.From.Offset = loopSize
   650  		p4.To.Type = obj.TYPE_REG
   651  		p4.To.Reg = ptrReg
   652  
   653  		p5 := s.Prog(loong64.ASUBV)
   654  		p5.From.Type = obj.TYPE_CONST
   655  		p5.From.Offset = 1
   656  		p5.To.Type = obj.TYPE_REG
   657  		p5.To.Reg = countReg
   658  
   659  		p6 := s.Prog(loong64.ABNE)
   660  		p6.From.Type = obj.TYPE_REG
   661  		p6.From.Reg = countReg
   662  		p6.To.Type = obj.TYPE_BRANCH
   663  		p6.To.SetTarget(p3.Link)
   664  
   665  		p7 := s.Prog(obj.AJMP)
   666  		p7.To.Type = obj.TYPE_BRANCH
   667  
   668  		p8 := s.Prog(loong64.AVXORV)
   669  		p8.From.Type = obj.TYPE_REG
   670  		p8.From.Reg = loong64.REG_V31
   671  		p8.To.Type = obj.TYPE_REG
   672  		p8.To.Reg = loong64.REG_V31
   673  		p3.To.SetTarget(p8)
   674  
   675  		for off = 0; off < loopSize; off += 16 {
   676  			zero16(s, loong64.REG_V31, ptrReg, off)
   677  		}
   678  
   679  		p9 := s.Prog(loong64.AADDV)
   680  		p9.From.Type = obj.TYPE_CONST
   681  		p9.From.Offset = loopSize
   682  		p9.To.Type = obj.TYPE_REG
   683  		p9.To.Reg = ptrReg
   684  
   685  		p10 := s.Prog(loong64.ASUBV)
   686  		p10.From.Type = obj.TYPE_CONST
   687  		p10.From.Offset = 1
   688  		p10.To.Type = obj.TYPE_REG
   689  		p10.To.Reg = countReg
   690  
   691  		p11 := s.Prog(loong64.ABNE)
   692  		p11.From.Type = obj.TYPE_REG
   693  		p11.From.Reg = countReg
   694  		p11.To.Type = obj.TYPE_BRANCH
   695  		p11.To.SetTarget(p8.Link)
   696  
   697  		p12 := s.Prog(obj.ANOP)
   698  		p7.To.SetTarget(p12)
   699  
   700  		// Multiples of the loop size are now done.
   701  		n %= loopSize
   702  		// Write any fractional portion.
   703  		for off = 0; n >= 8; off += 8 {
   704  			// MOVV   ZR, off(ptrReg)
   705  			zero8(s, ptrReg, off)
   706  			n -= 8
   707  		}
   708  
   709  		if n != 0 {
   710  			zero8(s, ptrReg, off+n-8)
   711  		}
   712  
   713  	case ssa.OpLOONG64LoweredMove:
   714  		dstReg := v.Args[0].Reg()
   715  		srcReg := v.Args[1].Reg()
   716  		if dstReg == srcReg {
   717  			break
   718  		}
   719  		tmpReg := int16(loong64.REG_R20)
   720  		n := v.AuxInt
   721  		if n < 16 {
   722  			v.Fatalf("Move too small %d", n)
   723  		}
   724  
   725  		var off int64
   726  		for n >= 8 {
   727  			// MOVV     off(srcReg), tmpReg
   728  			// MOVV     tmpReg, off(dstReg)
   729  			move8(s, srcReg, dstReg, tmpReg, off)
   730  			off += 8
   731  			n -= 8
   732  		}
   733  
   734  		if n != 0 {
   735  			// MOVV     off+n-8(srcReg), tmpReg
   736  			// MOVV     tmpReg, off+n-8(srcReg)
   737  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   738  		}
   739  	case ssa.OpLOONG64LoweredMoveLoop:
   740  		dstReg := v.Args[0].Reg()
   741  		srcReg := v.Args[1].Reg()
   742  		if dstReg == srcReg {
   743  			break
   744  		}
   745  		countReg := int16(loong64.REG_R20)
   746  		tmpReg := int16(loong64.REG_R21)
   747  		var off int64
   748  		n := v.AuxInt
   749  		loopSize := int64(64)
   750  		if n < 3*loopSize {
   751  			// - a loop count of 0 won't work.
   752  			// - a loop count of 1 is useless.
   753  			// - a loop count of 2 is a code size ~tie
   754  			//     4 instructions to implement the loop
   755  			//     8 instructions in the loop body
   756  			//   vs
   757  			//     16 instructions in the straightline code
   758  			//   Might as well use straightline code.
   759  			v.Fatalf("MoveLoop size too small %d", n)
   760  		}
   761  
   762  		// Put iteration count in a register.
   763  		//   MOVV     $n/loopSize, countReg
   764  		p := s.Prog(loong64.AMOVV)
   765  		p.From.Type = obj.TYPE_CONST
   766  		p.From.Offset = n / loopSize
   767  		p.To.Type = obj.TYPE_REG
   768  		p.To.Reg = countReg
   769  		cntInit := p
   770  
   771  		// Move loopSize bytes starting at srcReg to dstReg.
   772  		for range loopSize / 8 {
   773  			// MOVV     off(srcReg), tmpReg
   774  			// MOVV     tmpReg, off(dstReg)
   775  			move8(s, srcReg, dstReg, tmpReg, off)
   776  			off += 8
   777  		}
   778  
   779  		// Increment srcReg and destReg by loopSize.
   780  		//   ADDV     $loopSize, srcReg
   781  		p = s.Prog(loong64.AADDV)
   782  		p.From.Type = obj.TYPE_CONST
   783  		p.From.Offset = loopSize
   784  		p.To.Type = obj.TYPE_REG
   785  		p.To.Reg = srcReg
   786  		//   ADDV     $loopSize, dstReg
   787  		p = s.Prog(loong64.AADDV)
   788  		p.From.Type = obj.TYPE_CONST
   789  		p.From.Offset = loopSize
   790  		p.To.Type = obj.TYPE_REG
   791  		p.To.Reg = dstReg
   792  
   793  		// Decrement loop count.
   794  		//   SUBV     $1, countReg
   795  		p = s.Prog(loong64.ASUBV)
   796  		p.From.Type = obj.TYPE_CONST
   797  		p.From.Offset = 1
   798  		p.To.Type = obj.TYPE_REG
   799  		p.To.Reg = countReg
   800  
   801  		// Jump to loop header if we're not done yet.
   802  		//   BNE     countReg, loop header
   803  		p = s.Prog(loong64.ABNE)
   804  		p.From.Type = obj.TYPE_REG
   805  		p.From.Reg = countReg
   806  		p.To.Type = obj.TYPE_BRANCH
   807  		p.To.SetTarget(cntInit.Link)
   808  
   809  		// Multiples of the loop size are now done.
   810  		n %= loopSize
   811  
   812  		off = 0
   813  		// Copy any fractional portion.
   814  		for n >= 8 {
   815  			// MOVV     off(srcReg), tmpReg
   816  			// MOVV     tmpReg, off(dstReg)
   817  			move8(s, srcReg, dstReg, tmpReg, off)
   818  			off += 8
   819  			n -= 8
   820  		}
   821  
   822  		if n != 0 {
   823  			// MOVV     off+n-8(srcReg), tmpReg
   824  			// MOVV     tmpReg, off+n-8(srcReg)
   825  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   826  		}
   827  
   828  	case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter:
   829  		s.Call(v)
   830  	case ssa.OpLOONG64CALLtail:
   831  		s.TailCall(v)
   832  	case ssa.OpLOONG64LoweredWB:
   833  		p := s.Prog(obj.ACALL)
   834  		p.To.Type = obj.TYPE_MEM
   835  		p.To.Name = obj.NAME_EXTERN
   836  		// AuxInt encodes how many buffer entries we need.
   837  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
   838  
   839  	case ssa.OpLOONG64LoweredPubBarrier:
   840  		// DBAR 0x1A
   841  		p := s.Prog(v.Op.Asm())
   842  		p.From.Type = obj.TYPE_CONST
   843  		p.From.Offset = 0x1A
   844  
   845  	case ssa.OpLOONG64LoweredPanicBoundsRR, ssa.OpLOONG64LoweredPanicBoundsRC, ssa.OpLOONG64LoweredPanicBoundsCR, ssa.OpLOONG64LoweredPanicBoundsCC:
   846  		// Compute the constant we put in the PCData entry for this call.
   847  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
   848  		xIsReg := false
   849  		yIsReg := false
   850  		xVal := 0
   851  		yVal := 0
   852  		switch v.Op {
   853  		case ssa.OpLOONG64LoweredPanicBoundsRR:
   854  			xIsReg = true
   855  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   856  			yIsReg = true
   857  			yVal = int(v.Args[1].Reg() - loong64.REG_R4)
   858  		case ssa.OpLOONG64LoweredPanicBoundsRC:
   859  			xIsReg = true
   860  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   861  			c := v.Aux.(ssa.PanicBoundsC).C
   862  			if c >= 0 && c <= abi.BoundsMaxConst {
   863  				yVal = int(c)
   864  			} else {
   865  				// Move constant to a register
   866  				yIsReg = true
   867  				if yVal == xVal {
   868  					yVal = 1
   869  				}
   870  				p := s.Prog(loong64.AMOVV)
   871  				p.From.Type = obj.TYPE_CONST
   872  				p.From.Offset = c
   873  				p.To.Type = obj.TYPE_REG
   874  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   875  			}
   876  		case ssa.OpLOONG64LoweredPanicBoundsCR:
   877  			yIsReg = true
   878  			yVal = int(v.Args[0].Reg() - loong64.REG_R4)
   879  			c := v.Aux.(ssa.PanicBoundsC).C
   880  			if c >= 0 && c <= abi.BoundsMaxConst {
   881  				xVal = int(c)
   882  			} else {
   883  				// Move constant to a register
   884  				xIsReg = true
   885  				if xVal == yVal {
   886  					xVal = 1
   887  				}
   888  				p := s.Prog(loong64.AMOVV)
   889  				p.From.Type = obj.TYPE_CONST
   890  				p.From.Offset = c
   891  				p.To.Type = obj.TYPE_REG
   892  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   893  			}
   894  		case ssa.OpLOONG64LoweredPanicBoundsCC:
   895  			c := v.Aux.(ssa.PanicBoundsCC).Cx
   896  			if c >= 0 && c <= abi.BoundsMaxConst {
   897  				xVal = int(c)
   898  			} else {
   899  				// Move constant to a register
   900  				xIsReg = true
   901  				p := s.Prog(loong64.AMOVV)
   902  				p.From.Type = obj.TYPE_CONST
   903  				p.From.Offset = c
   904  				p.To.Type = obj.TYPE_REG
   905  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   906  			}
   907  			c = v.Aux.(ssa.PanicBoundsCC).Cy
   908  			if c >= 0 && c <= abi.BoundsMaxConst {
   909  				yVal = int(c)
   910  			} else {
   911  				// Move constant to a register
   912  				yIsReg = true
   913  				yVal = 1
   914  				p := s.Prog(loong64.AMOVV)
   915  				p.From.Type = obj.TYPE_CONST
   916  				p.From.Offset = c
   917  				p.To.Type = obj.TYPE_REG
   918  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   919  			}
   920  		}
   921  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
   922  
   923  		p := s.Prog(obj.APCDATA)
   924  		p.From.SetConst(abi.PCDATA_PanicBounds)
   925  		p.To.SetConst(int64(c))
   926  		p = s.Prog(obj.ACALL)
   927  		p.To.Type = obj.TYPE_MEM
   928  		p.To.Name = obj.NAME_EXTERN
   929  		p.To.Sym = ir.Syms.PanicBounds
   930  
   931  	case ssa.OpLOONG64LoweredAtomicLoad8, ssa.OpLOONG64LoweredAtomicLoad32, ssa.OpLOONG64LoweredAtomicLoad64:
   932  		// MOVB	(Rarg0), Rout
   933  		// DBAR	0x14
   934  		as := loong64.AMOVV
   935  		switch v.Op {
   936  		case ssa.OpLOONG64LoweredAtomicLoad8:
   937  			as = loong64.AMOVB
   938  		case ssa.OpLOONG64LoweredAtomicLoad32:
   939  			as = loong64.AMOVW
   940  		}
   941  		p := s.Prog(as)
   942  		p.From.Type = obj.TYPE_MEM
   943  		p.From.Reg = v.Args[0].Reg()
   944  		p.To.Type = obj.TYPE_REG
   945  		p.To.Reg = v.Reg0()
   946  		p1 := s.Prog(loong64.ADBAR)
   947  		p1.From.Type = obj.TYPE_CONST
   948  		p1.From.Offset = 0x14
   949  
   950  	case ssa.OpLOONG64LoweredAtomicStore8,
   951  		ssa.OpLOONG64LoweredAtomicStore32,
   952  		ssa.OpLOONG64LoweredAtomicStore64:
   953  		// DBAR 0x12
   954  		// MOVx (Rarg1), Rout
   955  		// DBAR 0x18
   956  		movx := loong64.AMOVV
   957  		switch v.Op {
   958  		case ssa.OpLOONG64LoweredAtomicStore8:
   959  			movx = loong64.AMOVB
   960  		case ssa.OpLOONG64LoweredAtomicStore32:
   961  			movx = loong64.AMOVW
   962  		}
   963  		p := s.Prog(loong64.ADBAR)
   964  		p.From.Type = obj.TYPE_CONST
   965  		p.From.Offset = 0x12
   966  
   967  		p1 := s.Prog(movx)
   968  		p1.From.Type = obj.TYPE_REG
   969  		p1.From.Reg = v.Args[1].Reg()
   970  		p1.To.Type = obj.TYPE_MEM
   971  		p1.To.Reg = v.Args[0].Reg()
   972  
   973  		p2 := s.Prog(loong64.ADBAR)
   974  		p2.From.Type = obj.TYPE_CONST
   975  		p2.From.Offset = 0x18
   976  
   977  	case ssa.OpLOONG64LoweredAtomicStore8Variant,
   978  		ssa.OpLOONG64LoweredAtomicStore32Variant,
   979  		ssa.OpLOONG64LoweredAtomicStore64Variant:
   980  		//AMSWAPx  Rarg1, (Rarg0), Rout
   981  		amswapx := loong64.AAMSWAPDBV
   982  		switch v.Op {
   983  		case ssa.OpLOONG64LoweredAtomicStore32Variant:
   984  			amswapx = loong64.AAMSWAPDBW
   985  		case ssa.OpLOONG64LoweredAtomicStore8Variant:
   986  			amswapx = loong64.AAMSWAPDBB
   987  		}
   988  		p := s.Prog(amswapx)
   989  		p.From.Type = obj.TYPE_REG
   990  		p.From.Reg = v.Args[1].Reg()
   991  		p.To.Type = obj.TYPE_MEM
   992  		p.To.Reg = v.Args[0].Reg()
   993  		p.RegTo2 = loong64.REGZERO
   994  
   995  	case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64:
   996  		// AMSWAPx	Rarg1, (Rarg0), Rout
   997  		amswapx := loong64.AAMSWAPDBV
   998  		if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 {
   999  			amswapx = loong64.AAMSWAPDBW
  1000  		}
  1001  		p := s.Prog(amswapx)
  1002  		p.From.Type = obj.TYPE_REG
  1003  		p.From.Reg = v.Args[1].Reg()
  1004  		p.To.Type = obj.TYPE_MEM
  1005  		p.To.Reg = v.Args[0].Reg()
  1006  		p.RegTo2 = v.Reg0()
  1007  
  1008  	case ssa.OpLOONG64LoweredAtomicExchange8Variant:
  1009  		// AMSWAPDBB	Rarg1, (Rarg0), Rout
  1010  		p := s.Prog(loong64.AAMSWAPDBB)
  1011  		p.From.Type = obj.TYPE_REG
  1012  		p.From.Reg = v.Args[1].Reg()
  1013  		p.To.Type = obj.TYPE_MEM
  1014  		p.To.Reg = v.Args[0].Reg()
  1015  		p.RegTo2 = v.Reg0()
  1016  
  1017  	case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
  1018  		// AMADDx  Rarg1, (Rarg0), Rout
  1019  		// ADDV    Rarg1, Rout, Rout
  1020  		amaddx := loong64.AAMADDDBV
  1021  		addx := loong64.AADDV
  1022  		if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 {
  1023  			amaddx = loong64.AAMADDDBW
  1024  		}
  1025  		p := s.Prog(amaddx)
  1026  		p.From.Type = obj.TYPE_REG
  1027  		p.From.Reg = v.Args[1].Reg()
  1028  		p.To.Type = obj.TYPE_MEM
  1029  		p.To.Reg = v.Args[0].Reg()
  1030  		p.RegTo2 = v.Reg0()
  1031  
  1032  		p1 := s.Prog(addx)
  1033  		p1.From.Type = obj.TYPE_REG
  1034  		p1.From.Reg = v.Args[1].Reg()
  1035  		p1.Reg = v.Reg0()
  1036  		p1.To.Type = obj.TYPE_REG
  1037  		p1.To.Reg = v.Reg0()
  1038  
  1039  	case ssa.OpLOONG64LoweredAtomicCas32, ssa.OpLOONG64LoweredAtomicCas64:
  1040  		// MOVV $0, Rout
  1041  		// DBAR 0x14
  1042  		// LL	(Rarg0), Rtmp
  1043  		// BNE	Rtmp, Rarg1, 4(PC)
  1044  		// MOVV Rarg2, Rout
  1045  		// SC	Rout, (Rarg0)
  1046  		// BEQ	Rout, -4(PC)
  1047  		// DBAR 0x12
  1048  		ll := loong64.ALLV
  1049  		sc := loong64.ASCV
  1050  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32 {
  1051  			ll = loong64.ALL
  1052  			sc = loong64.ASC
  1053  		}
  1054  
  1055  		p := s.Prog(loong64.AMOVV)
  1056  		p.From.Type = obj.TYPE_REG
  1057  		p.From.Reg = loong64.REGZERO
  1058  		p.To.Type = obj.TYPE_REG
  1059  		p.To.Reg = v.Reg0()
  1060  
  1061  		p1 := s.Prog(loong64.ADBAR)
  1062  		p1.From.Type = obj.TYPE_CONST
  1063  		p1.From.Offset = 0x14
  1064  
  1065  		p2 := s.Prog(ll)
  1066  		p2.From.Type = obj.TYPE_MEM
  1067  		p2.From.Reg = v.Args[0].Reg()
  1068  		p2.To.Type = obj.TYPE_REG
  1069  		p2.To.Reg = loong64.REGTMP
  1070  
  1071  		p3 := s.Prog(loong64.ABNE)
  1072  		p3.From.Type = obj.TYPE_REG
  1073  		p3.From.Reg = v.Args[1].Reg()
  1074  		p3.Reg = loong64.REGTMP
  1075  		p3.To.Type = obj.TYPE_BRANCH
  1076  
  1077  		p4 := s.Prog(loong64.AMOVV)
  1078  		p4.From.Type = obj.TYPE_REG
  1079  		p4.From.Reg = v.Args[2].Reg()
  1080  		p4.To.Type = obj.TYPE_REG
  1081  		p4.To.Reg = v.Reg0()
  1082  
  1083  		p5 := s.Prog(sc)
  1084  		p5.From.Type = obj.TYPE_REG
  1085  		p5.From.Reg = v.Reg0()
  1086  		p5.To.Type = obj.TYPE_MEM
  1087  		p5.To.Reg = v.Args[0].Reg()
  1088  
  1089  		p6 := s.Prog(loong64.ABEQ)
  1090  		p6.From.Type = obj.TYPE_REG
  1091  		p6.From.Reg = v.Reg0()
  1092  		p6.To.Type = obj.TYPE_BRANCH
  1093  		p6.To.SetTarget(p2)
  1094  
  1095  		p7 := s.Prog(loong64.ADBAR)
  1096  		p7.From.Type = obj.TYPE_CONST
  1097  		p7.From.Offset = 0x12
  1098  		p3.To.SetTarget(p7)
  1099  
  1100  	case ssa.OpLOONG64LoweredAtomicAnd32,
  1101  		ssa.OpLOONG64LoweredAtomicOr32:
  1102  		// AM{AND,OR}DBx  Rarg1, (Rarg0), RegZero
  1103  		p := s.Prog(v.Op.Asm())
  1104  		p.From.Type = obj.TYPE_REG
  1105  		p.From.Reg = v.Args[1].Reg()
  1106  		p.To.Type = obj.TYPE_MEM
  1107  		p.To.Reg = v.Args[0].Reg()
  1108  		p.RegTo2 = loong64.REGZERO
  1109  
  1110  	case ssa.OpLOONG64LoweredAtomicAnd32value,
  1111  		ssa.OpLOONG64LoweredAtomicAnd64value,
  1112  		ssa.OpLOONG64LoweredAtomicOr64value,
  1113  		ssa.OpLOONG64LoweredAtomicOr32value:
  1114  		// AM{AND,OR}DBx  Rarg1, (Rarg0), Rout
  1115  		p := s.Prog(v.Op.Asm())
  1116  		p.From.Type = obj.TYPE_REG
  1117  		p.From.Reg = v.Args[1].Reg()
  1118  		p.To.Type = obj.TYPE_MEM
  1119  		p.To.Reg = v.Args[0].Reg()
  1120  		p.RegTo2 = v.Reg0()
  1121  
  1122  	case ssa.OpLOONG64LoweredAtomicCas64Variant, ssa.OpLOONG64LoweredAtomicCas32Variant:
  1123  		// MOVV         $0, Rout
  1124  		// MOVV         Rarg1, Rtmp
  1125  		// AMCASDBx     Rarg2, (Rarg0), Rtmp
  1126  		// BNE          Rarg1, Rtmp, 2(PC)
  1127  		// MOVV         $1, Rout
  1128  		// NOP
  1129  
  1130  		amcasx := loong64.AAMCASDBV
  1131  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32Variant {
  1132  			amcasx = loong64.AAMCASDBW
  1133  		}
  1134  
  1135  		p := s.Prog(loong64.AMOVV)
  1136  		p.From.Type = obj.TYPE_REG
  1137  		p.From.Reg = loong64.REGZERO
  1138  		p.To.Type = obj.TYPE_REG
  1139  		p.To.Reg = v.Reg0()
  1140  
  1141  		p1 := s.Prog(loong64.AMOVV)
  1142  		p1.From.Type = obj.TYPE_REG
  1143  		p1.From.Reg = v.Args[1].Reg()
  1144  		p1.To.Type = obj.TYPE_REG
  1145  		p1.To.Reg = loong64.REGTMP
  1146  
  1147  		p2 := s.Prog(amcasx)
  1148  		p2.From.Type = obj.TYPE_REG
  1149  		p2.From.Reg = v.Args[2].Reg()
  1150  		p2.To.Type = obj.TYPE_MEM
  1151  		p2.To.Reg = v.Args[0].Reg()
  1152  		p2.RegTo2 = loong64.REGTMP
  1153  
  1154  		p3 := s.Prog(loong64.ABNE)
  1155  		p3.From.Type = obj.TYPE_REG
  1156  		p3.From.Reg = v.Args[1].Reg()
  1157  		p3.Reg = loong64.REGTMP
  1158  		p3.To.Type = obj.TYPE_BRANCH
  1159  
  1160  		p4 := s.Prog(loong64.AMOVV)
  1161  		p4.From.Type = obj.TYPE_CONST
  1162  		p4.From.Offset = 0x1
  1163  		p4.To.Type = obj.TYPE_REG
  1164  		p4.To.Reg = v.Reg0()
  1165  
  1166  		p5 := s.Prog(obj.ANOP)
  1167  		p3.To.SetTarget(p5)
  1168  
  1169  	case ssa.OpLOONG64LoweredNilCheck:
  1170  		// Issue a load which will fault if arg is nil.
  1171  		p := s.Prog(loong64.AMOVB)
  1172  		p.From.Type = obj.TYPE_MEM
  1173  		p.From.Reg = v.Args[0].Reg()
  1174  		ssagen.AddAux(&p.From, v)
  1175  		p.To.Type = obj.TYPE_REG
  1176  		p.To.Reg = loong64.REGTMP
  1177  		if logopt.Enabled() {
  1178  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1179  		}
  1180  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1181  			base.WarnfAt(v.Pos, "generated nil check")
  1182  		}
  1183  	case ssa.OpLOONG64FPFlagTrue,
  1184  		ssa.OpLOONG64FPFlagFalse:
  1185  		// MOVV	$0, r
  1186  		// BFPF	2(PC)
  1187  		// MOVV	$1, r
  1188  		branch := loong64.ABFPF
  1189  		if v.Op == ssa.OpLOONG64FPFlagFalse {
  1190  			branch = loong64.ABFPT
  1191  		}
  1192  		p := s.Prog(loong64.AMOVV)
  1193  		p.From.Type = obj.TYPE_REG
  1194  		p.From.Reg = loong64.REGZERO
  1195  		p.To.Type = obj.TYPE_REG
  1196  		p.To.Reg = v.Reg()
  1197  		p2 := s.Prog(branch)
  1198  		p2.To.Type = obj.TYPE_BRANCH
  1199  		p3 := s.Prog(loong64.AMOVV)
  1200  		p3.From.Type = obj.TYPE_CONST
  1201  		p3.From.Offset = 1
  1202  		p3.To.Type = obj.TYPE_REG
  1203  		p3.To.Reg = v.Reg()
  1204  		p4 := s.Prog(obj.ANOP) // not a machine instruction, for branch to land
  1205  		p2.To.SetTarget(p4)
  1206  	case ssa.OpLOONG64LoweredGetClosurePtr:
  1207  		// Closure pointer is R22 (loong64.REGCTXT).
  1208  		ssagen.CheckLoweredGetClosurePtr(v)
  1209  	case ssa.OpLOONG64LoweredGetCallerSP:
  1210  		// caller's SP is FixedFrameSize below the address of the first arg
  1211  		p := s.Prog(loong64.AMOVV)
  1212  		p.From.Type = obj.TYPE_ADDR
  1213  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1214  		p.From.Name = obj.NAME_PARAM
  1215  		p.To.Type = obj.TYPE_REG
  1216  		p.To.Reg = v.Reg()
  1217  	case ssa.OpLOONG64LoweredGetCallerPC:
  1218  		p := s.Prog(obj.AGETCALLERPC)
  1219  		p.To.Type = obj.TYPE_REG
  1220  		p.To.Reg = v.Reg()
  1221  	case ssa.OpLOONG64MASKEQZ, ssa.OpLOONG64MASKNEZ:
  1222  		p := s.Prog(v.Op.Asm())
  1223  		p.From.Type = obj.TYPE_REG
  1224  		p.From.Reg = v.Args[1].Reg()
  1225  		p.Reg = v.Args[0].Reg()
  1226  		p.To.Type = obj.TYPE_REG
  1227  		p.To.Reg = v.Reg()
  1228  
  1229  	case ssa.OpLOONG64PRELD:
  1230  		// PRELD (Rarg0), hint
  1231  		p := s.Prog(v.Op.Asm())
  1232  		p.From.Type = obj.TYPE_MEM
  1233  		p.From.Reg = v.Args[0].Reg()
  1234  		p.AddRestSourceConst(v.AuxInt & 0x1f)
  1235  
  1236  	case ssa.OpLOONG64PRELDX:
  1237  		// PRELDX (Rarg0), $n, $hint
  1238  		p := s.Prog(v.Op.Asm())
  1239  		p.From.Type = obj.TYPE_MEM
  1240  		p.From.Reg = v.Args[0].Reg()
  1241  		p.AddRestSourceArgs([]obj.Addr{
  1242  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 5) & 0x1fffffffff},
  1243  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 0) & 0x1f},
  1244  		})
  1245  
  1246  	case ssa.OpLOONG64ADDshiftLLV:
  1247  		// ADDshiftLLV Rarg0, Rarg1, $shift
  1248  		// ALSLV $shift, Rarg1, Rarg0, Rtmp
  1249  		p := s.Prog(v.Op.Asm())
  1250  		p.From.Type = obj.TYPE_CONST
  1251  		p.From.Offset = v.AuxInt
  1252  		p.Reg = v.Args[1].Reg()
  1253  		p.AddRestSourceReg(v.Args[0].Reg())
  1254  		p.To.Type = obj.TYPE_REG
  1255  		p.To.Reg = v.Reg()
  1256  
  1257  	case ssa.OpClobber, ssa.OpClobberReg:
  1258  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1259  	default:
  1260  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1261  	}
  1262  }
  1263  
  1264  var blockJump = map[ssa.BlockKind]struct {
  1265  	asm, invasm obj.As
  1266  }{
  1267  	ssa.BlockLOONG64EQZ:  {loong64.ABEQ, loong64.ABNE},
  1268  	ssa.BlockLOONG64NEZ:  {loong64.ABNE, loong64.ABEQ},
  1269  	ssa.BlockLOONG64LTZ:  {loong64.ABLTZ, loong64.ABGEZ},
  1270  	ssa.BlockLOONG64GEZ:  {loong64.ABGEZ, loong64.ABLTZ},
  1271  	ssa.BlockLOONG64LEZ:  {loong64.ABLEZ, loong64.ABGTZ},
  1272  	ssa.BlockLOONG64GTZ:  {loong64.ABGTZ, loong64.ABLEZ},
  1273  	ssa.BlockLOONG64FPT:  {loong64.ABFPT, loong64.ABFPF},
  1274  	ssa.BlockLOONG64FPF:  {loong64.ABFPF, loong64.ABFPT},
  1275  	ssa.BlockLOONG64BEQ:  {loong64.ABEQ, loong64.ABNE},
  1276  	ssa.BlockLOONG64BNE:  {loong64.ABNE, loong64.ABEQ},
  1277  	ssa.BlockLOONG64BGE:  {loong64.ABGE, loong64.ABLT},
  1278  	ssa.BlockLOONG64BLT:  {loong64.ABLT, loong64.ABGE},
  1279  	ssa.BlockLOONG64BLTU: {loong64.ABLTU, loong64.ABGEU},
  1280  	ssa.BlockLOONG64BGEU: {loong64.ABGEU, loong64.ABLTU},
  1281  }
  1282  
  1283  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1284  	switch b.Kind {
  1285  	case ssa.BlockPlain, ssa.BlockDefer:
  1286  		if b.Succs[0].Block() != next {
  1287  			p := s.Prog(obj.AJMP)
  1288  			p.To.Type = obj.TYPE_BRANCH
  1289  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1290  		}
  1291  	case ssa.BlockExit, ssa.BlockRetJmp:
  1292  	case ssa.BlockRet:
  1293  		s.Prog(obj.ARET)
  1294  	case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1295  		ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1296  		ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1297  		ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1298  		ssa.BlockLOONG64BLT, ssa.BlockLOONG64BGE,
  1299  		ssa.BlockLOONG64BLTU, ssa.BlockLOONG64BGEU,
  1300  		ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1301  		jmp := blockJump[b.Kind]
  1302  		var p *obj.Prog
  1303  		switch next {
  1304  		case b.Succs[0].Block():
  1305  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1306  		case b.Succs[1].Block():
  1307  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1308  		default:
  1309  			if b.Likely != ssa.BranchUnlikely {
  1310  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1311  				s.Br(obj.AJMP, b.Succs[1].Block())
  1312  			} else {
  1313  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1314  				s.Br(obj.AJMP, b.Succs[0].Block())
  1315  			}
  1316  		}
  1317  		switch b.Kind {
  1318  		case ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1319  			ssa.BlockLOONG64BGE, ssa.BlockLOONG64BLT,
  1320  			ssa.BlockLOONG64BGEU, ssa.BlockLOONG64BLTU:
  1321  			p.From.Type = obj.TYPE_REG
  1322  			p.From.Reg = b.Controls[0].Reg()
  1323  			p.Reg = b.Controls[1].Reg()
  1324  		case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1325  			ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1326  			ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1327  			ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1328  			if !b.Controls[0].Type.IsFlags() {
  1329  				p.From.Type = obj.TYPE_REG
  1330  				p.From.Reg = b.Controls[0].Reg()
  1331  			}
  1332  		}
  1333  	case ssa.BlockLOONG64JUMPTABLE:
  1334  		// ALSLV $3, Rarg0, Rarg1, REGTMP
  1335  		// MOVV (REGTMP), REGTMP
  1336  		// JMP	(REGTMP)
  1337  		p := s.Prog(loong64.AALSLV)
  1338  		p.From.Type = obj.TYPE_CONST
  1339  		p.From.Offset = 3 // idx*8
  1340  		p.Reg = b.Controls[0].Reg()
  1341  		p.AddRestSourceReg(b.Controls[1].Reg())
  1342  		p.To.Type = obj.TYPE_REG
  1343  		p.To.Reg = loong64.REGTMP
  1344  		p1 := s.Prog(loong64.AMOVV)
  1345  		p1.From.Type = obj.TYPE_MEM
  1346  		p1.From.Reg = loong64.REGTMP
  1347  		p1.From.Offset = 0
  1348  		p1.To.Type = obj.TYPE_REG
  1349  		p1.To.Reg = loong64.REGTMP
  1350  		p2 := s.Prog(obj.AJMP)
  1351  		p2.To.Type = obj.TYPE_MEM
  1352  		p2.To.Reg = loong64.REGTMP
  1353  		// Save jump tables for later resolution of the target blocks.
  1354  		s.JumpTables = append(s.JumpTables, b)
  1355  
  1356  	default:
  1357  		b.Fatalf("branch not implemented: %s", b.LongString())
  1358  	}
  1359  }
  1360  
  1361  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1362  	p := s.Prog(loadByType(t, reg))
  1363  	p.From.Type = obj.TYPE_MEM
  1364  	p.From.Name = obj.NAME_AUTO
  1365  	p.From.Sym = n.Linksym()
  1366  	p.From.Offset = n.FrameOffset() + off
  1367  	p.To.Type = obj.TYPE_REG
  1368  	p.To.Reg = reg
  1369  	return p
  1370  }
  1371  
  1372  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1373  	p = pp.Append(p, storeByType(t, reg), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1374  	p.To.Name = obj.NAME_PARAM
  1375  	p.To.Sym = n.Linksym()
  1376  	p.Pos = p.Pos.WithNotStmt()
  1377  	return p
  1378  }
  1379  
  1380  // move8 copies 8 bytes at src+off to dst+off.
  1381  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1382  	// MOVV     off(src), tmp
  1383  	ld := s.Prog(loong64.AMOVV)
  1384  	ld.From.Type = obj.TYPE_MEM
  1385  	ld.From.Reg = src
  1386  	ld.From.Offset = off
  1387  	ld.To.Type = obj.TYPE_REG
  1388  	ld.To.Reg = tmp
  1389  	// MOVV     tmp, off(dst)
  1390  	st := s.Prog(loong64.AMOVV)
  1391  	st.From.Type = obj.TYPE_REG
  1392  	st.From.Reg = tmp
  1393  	st.To.Type = obj.TYPE_MEM
  1394  	st.To.Reg = dst
  1395  	st.To.Offset = off
  1396  }
  1397  
  1398  // zero8 zeroes 8 bytes at reg+off.
  1399  func zero8(s *ssagen.State, reg int16, off int64) {
  1400  	// MOVV   ZR, off(reg)
  1401  	p := s.Prog(loong64.AMOVV)
  1402  	p.From.Type = obj.TYPE_REG
  1403  	p.From.Reg = loong64.REGZERO
  1404  	p.To.Type = obj.TYPE_MEM
  1405  	p.To.Reg = reg
  1406  	p.To.Offset = off
  1407  }
  1408  
  1409  // zero16 zeroes 16 bytes at reg+off.
  1410  func zero16(s *ssagen.State, regZero, regBase int16, off int64) {
  1411  	// VMOVQ   regZero, off(regBase)
  1412  	p := s.Prog(loong64.AVMOVQ)
  1413  	p.From.Type = obj.TYPE_REG
  1414  	p.From.Reg = regZero
  1415  	p.To.Type = obj.TYPE_MEM
  1416  	p.To.Reg = regBase
  1417  	p.To.Offset = off
  1418  }
  1419  

View as plain text