Source file src/cmd/compile/internal/loong64/ssa.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package loong64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/loong64"
    19  	"internal/abi"
    20  )
    21  
    22  // isFPreg reports whether r is an FP register.
    23  func isFPreg(r int16) bool {
    24  	return loong64.REG_F0 <= r && r <= loong64.REG_F31
    25  }
    26  
    27  // loadByType returns the load instruction of the given type.
    28  func loadByType(t *types.Type, r int16) obj.As {
    29  	if isFPreg(r) {
    30  		if t.Size() == 4 {
    31  			return loong64.AMOVF
    32  		} else {
    33  			return loong64.AMOVD
    34  		}
    35  	} else {
    36  		switch t.Size() {
    37  		case 1:
    38  			if t.IsSigned() {
    39  				return loong64.AMOVB
    40  			} else {
    41  				return loong64.AMOVBU
    42  			}
    43  		case 2:
    44  			if t.IsSigned() {
    45  				return loong64.AMOVH
    46  			} else {
    47  				return loong64.AMOVHU
    48  			}
    49  		case 4:
    50  			if t.IsSigned() {
    51  				return loong64.AMOVW
    52  			} else {
    53  				return loong64.AMOVWU
    54  			}
    55  		case 8:
    56  			return loong64.AMOVV
    57  		}
    58  	}
    59  	panic("bad load type")
    60  }
    61  
    62  // storeByType returns the store instruction of the given type.
    63  func storeByType(t *types.Type, r int16) obj.As {
    64  	if isFPreg(r) {
    65  		if t.Size() == 4 {
    66  			return loong64.AMOVF
    67  		} else {
    68  			return loong64.AMOVD
    69  		}
    70  	} else {
    71  		switch t.Size() {
    72  		case 1:
    73  			return loong64.AMOVB
    74  		case 2:
    75  			return loong64.AMOVH
    76  		case 4:
    77  			return loong64.AMOVW
    78  		case 8:
    79  			return loong64.AMOVV
    80  		}
    81  	}
    82  	panic("bad store type")
    83  }
    84  
    85  // largestMove returns the largest move instruction possible and its size,
    86  // given the alignment of the total size of the move.
    87  //
    88  // e.g., a 16-byte move may use MOVV, but an 11-byte move must use MOVB.
    89  //
    90  // Note that the moves may not be on naturally aligned addresses depending on
    91  // the source and destination.
    92  //
    93  // This matches the calculation in ssa.moveSize.
    94  func largestMove(alignment int64) (obj.As, int64) {
    95  	switch {
    96  	case alignment%8 == 0:
    97  		return loong64.AMOVV, 8
    98  	case alignment%4 == 0:
    99  		return loong64.AMOVW, 4
   100  	case alignment%2 == 0:
   101  		return loong64.AMOVH, 2
   102  	default:
   103  		return loong64.AMOVB, 1
   104  	}
   105  }
   106  
   107  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   108  	switch v.Op {
   109  	case ssa.OpCopy, ssa.OpLOONG64MOVVreg:
   110  		if v.Type.IsMemory() {
   111  			return
   112  		}
   113  		x := v.Args[0].Reg()
   114  		y := v.Reg()
   115  		if x == y {
   116  			return
   117  		}
   118  		as := loong64.AMOVV
   119  		if isFPreg(x) && isFPreg(y) {
   120  			as = loong64.AMOVD
   121  		}
   122  		p := s.Prog(as)
   123  		p.From.Type = obj.TYPE_REG
   124  		p.From.Reg = x
   125  		p.To.Type = obj.TYPE_REG
   126  		p.To.Reg = y
   127  	case ssa.OpLOONG64MOVVnop,
   128  		ssa.OpLOONG64ZERO,
   129  		ssa.OpLOONG64LoweredRound32F,
   130  		ssa.OpLOONG64LoweredRound64F:
   131  		// nothing to do
   132  	case ssa.OpLoadReg:
   133  		if v.Type.IsFlags() {
   134  			v.Fatalf("load flags not implemented: %v", v.LongString())
   135  			return
   136  		}
   137  		r := v.Reg()
   138  		p := s.Prog(loadByType(v.Type, r))
   139  		ssagen.AddrAuto(&p.From, v.Args[0])
   140  		p.To.Type = obj.TYPE_REG
   141  		p.To.Reg = r
   142  	case ssa.OpStoreReg:
   143  		if v.Type.IsFlags() {
   144  			v.Fatalf("store flags not implemented: %v", v.LongString())
   145  			return
   146  		}
   147  		r := v.Args[0].Reg()
   148  		p := s.Prog(storeByType(v.Type, r))
   149  		p.From.Type = obj.TYPE_REG
   150  		p.From.Reg = r
   151  		ssagen.AddrAuto(&p.To, v)
   152  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   153  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   154  		// The loop only runs once.
   155  		for _, a := range v.Block.Func.RegArgs {
   156  			// Pass the spill/unspill information along to the assembler, offset by size of
   157  			// the saved LR slot.
   158  			addr := ssagen.SpillSlotAddr(a, loong64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   159  			s.FuncInfo().AddSpill(
   160  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type, a.Reg), Spill: storeByType(a.Type, a.Reg)})
   161  		}
   162  		v.Block.Func.RegArgs = nil
   163  		ssagen.CheckArgReg(v)
   164  	case ssa.OpLOONG64ADDV,
   165  		ssa.OpLOONG64SUBV,
   166  		ssa.OpLOONG64AND,
   167  		ssa.OpLOONG64OR,
   168  		ssa.OpLOONG64XOR,
   169  		ssa.OpLOONG64NOR,
   170  		ssa.OpLOONG64ANDN,
   171  		ssa.OpLOONG64ORN,
   172  		ssa.OpLOONG64SLL,
   173  		ssa.OpLOONG64SLLV,
   174  		ssa.OpLOONG64SRL,
   175  		ssa.OpLOONG64SRLV,
   176  		ssa.OpLOONG64SRA,
   177  		ssa.OpLOONG64SRAV,
   178  		ssa.OpLOONG64ROTR,
   179  		ssa.OpLOONG64ROTRV,
   180  		ssa.OpLOONG64ADDF,
   181  		ssa.OpLOONG64ADDD,
   182  		ssa.OpLOONG64SUBF,
   183  		ssa.OpLOONG64SUBD,
   184  		ssa.OpLOONG64MULF,
   185  		ssa.OpLOONG64MULD,
   186  		ssa.OpLOONG64DIVF,
   187  		ssa.OpLOONG64DIVD,
   188  		ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU, ssa.OpLOONG64MULH, ssa.OpLOONG64MULHU,
   189  		ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU,
   190  		ssa.OpLOONG64MULWVW, ssa.OpLOONG64MULWVWU,
   191  		ssa.OpLOONG64FCOPYSGD:
   192  		p := s.Prog(v.Op.Asm())
   193  		p.From.Type = obj.TYPE_REG
   194  		p.From.Reg = v.Args[1].Reg()
   195  		p.Reg = v.Args[0].Reg()
   196  		p.To.Type = obj.TYPE_REG
   197  		p.To.Reg = v.Reg()
   198  
   199  	case ssa.OpLOONG64BSTRPICKV,
   200  		ssa.OpLOONG64BSTRPICKW:
   201  		p := s.Prog(v.Op.Asm())
   202  		p.From.Type = obj.TYPE_CONST
   203  		if v.Op == ssa.OpLOONG64BSTRPICKW {
   204  			p.From.Offset = v.AuxInt >> 5
   205  			p.AddRestSourceConst(v.AuxInt & 0x1f)
   206  		} else {
   207  			p.From.Offset = v.AuxInt >> 6
   208  			p.AddRestSourceConst(v.AuxInt & 0x3f)
   209  		}
   210  		p.Reg = v.Args[0].Reg()
   211  		p.To.Type = obj.TYPE_REG
   212  		p.To.Reg = v.Reg()
   213  
   214  	case ssa.OpLOONG64FMINF,
   215  		ssa.OpLOONG64FMIND,
   216  		ssa.OpLOONG64FMAXF,
   217  		ssa.OpLOONG64FMAXD:
   218  		// ADDD Rarg0, Rarg1, Rout
   219  		// CMPEQD Rarg0, Rarg0, FCC0
   220  		// bceqz FCC0, end
   221  		// CMPEQD Rarg1, Rarg1, FCC0
   222  		// bceqz FCC0, end
   223  		// F(MIN|MAX)(F|D)
   224  
   225  		r0 := v.Args[0].Reg()
   226  		r1 := v.Args[1].Reg()
   227  		out := v.Reg()
   228  		add, fcmp := loong64.AADDD, loong64.ACMPEQD
   229  		if v.Op == ssa.OpLOONG64FMINF || v.Op == ssa.OpLOONG64FMAXF {
   230  			add = loong64.AADDF
   231  			fcmp = loong64.ACMPEQF
   232  		}
   233  		p1 := s.Prog(add)
   234  		p1.From.Type = obj.TYPE_REG
   235  		p1.From.Reg = r0
   236  		p1.Reg = r1
   237  		p1.To.Type = obj.TYPE_REG
   238  		p1.To.Reg = out
   239  
   240  		p2 := s.Prog(fcmp)
   241  		p2.From.Type = obj.TYPE_REG
   242  		p2.From.Reg = r0
   243  		p2.Reg = r0
   244  		p2.To.Type = obj.TYPE_REG
   245  		p2.To.Reg = loong64.REG_FCC0
   246  
   247  		p3 := s.Prog(loong64.ABFPF)
   248  		p3.To.Type = obj.TYPE_BRANCH
   249  
   250  		p4 := s.Prog(fcmp)
   251  		p4.From.Type = obj.TYPE_REG
   252  		p4.From.Reg = r1
   253  		p4.Reg = r1
   254  		p4.To.Type = obj.TYPE_REG
   255  		p4.To.Reg = loong64.REG_FCC0
   256  
   257  		p5 := s.Prog(loong64.ABFPF)
   258  		p5.To.Type = obj.TYPE_BRANCH
   259  
   260  		p6 := s.Prog(v.Op.Asm())
   261  		p6.From.Type = obj.TYPE_REG
   262  		p6.From.Reg = r1
   263  		p6.Reg = r0
   264  		p6.To.Type = obj.TYPE_REG
   265  		p6.To.Reg = out
   266  
   267  		nop := s.Prog(obj.ANOP)
   268  		p3.To.SetTarget(nop)
   269  		p5.To.SetTarget(nop)
   270  
   271  	case ssa.OpLOONG64SGT,
   272  		ssa.OpLOONG64SGTU:
   273  		p := s.Prog(v.Op.Asm())
   274  		p.From.Type = obj.TYPE_REG
   275  		p.From.Reg = v.Args[0].Reg()
   276  		p.Reg = v.Args[1].Reg()
   277  		p.To.Type = obj.TYPE_REG
   278  		p.To.Reg = v.Reg()
   279  	case ssa.OpLOONG64ADDVconst,
   280  		ssa.OpLOONG64ADDV16const,
   281  		ssa.OpLOONG64SUBVconst,
   282  		ssa.OpLOONG64ANDconst,
   283  		ssa.OpLOONG64ORconst,
   284  		ssa.OpLOONG64XORconst,
   285  		ssa.OpLOONG64SLLconst,
   286  		ssa.OpLOONG64SLLVconst,
   287  		ssa.OpLOONG64SRLconst,
   288  		ssa.OpLOONG64SRLVconst,
   289  		ssa.OpLOONG64SRAconst,
   290  		ssa.OpLOONG64SRAVconst,
   291  		ssa.OpLOONG64ROTRconst,
   292  		ssa.OpLOONG64ROTRVconst,
   293  		ssa.OpLOONG64SGTconst,
   294  		ssa.OpLOONG64SGTUconst:
   295  		p := s.Prog(v.Op.Asm())
   296  		p.From.Type = obj.TYPE_CONST
   297  		p.From.Offset = v.AuxInt
   298  		p.Reg = v.Args[0].Reg()
   299  		p.To.Type = obj.TYPE_REG
   300  		p.To.Reg = v.Reg()
   301  
   302  	case ssa.OpLOONG64NORconst:
   303  		// MOVV $const, Rtmp
   304  		// NOR  Rtmp, Rarg0, Rout
   305  		p := s.Prog(loong64.AMOVV)
   306  		p.From.Type = obj.TYPE_CONST
   307  		p.From.Offset = v.AuxInt
   308  		p.To.Type = obj.TYPE_REG
   309  		p.To.Reg = loong64.REGTMP
   310  
   311  		p2 := s.Prog(v.Op.Asm())
   312  		p2.From.Type = obj.TYPE_REG
   313  		p2.From.Reg = loong64.REGTMP
   314  		p2.Reg = v.Args[0].Reg()
   315  		p2.To.Type = obj.TYPE_REG
   316  		p2.To.Reg = v.Reg()
   317  
   318  	case ssa.OpLOONG64MOVVconst:
   319  		r := v.Reg()
   320  		p := s.Prog(v.Op.Asm())
   321  		p.From.Type = obj.TYPE_CONST
   322  		p.From.Offset = v.AuxInt
   323  		p.To.Type = obj.TYPE_REG
   324  		p.To.Reg = r
   325  		if isFPreg(r) {
   326  			// cannot move into FP or special registers, use TMP as intermediate
   327  			p.To.Reg = loong64.REGTMP
   328  			p = s.Prog(loong64.AMOVV)
   329  			p.From.Type = obj.TYPE_REG
   330  			p.From.Reg = loong64.REGTMP
   331  			p.To.Type = obj.TYPE_REG
   332  			p.To.Reg = r
   333  		}
   334  	case ssa.OpLOONG64MOVFconst,
   335  		ssa.OpLOONG64MOVDconst:
   336  		p := s.Prog(v.Op.Asm())
   337  		p.From.Type = obj.TYPE_FCONST
   338  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   339  		p.To.Type = obj.TYPE_REG
   340  		p.To.Reg = v.Reg()
   341  	case ssa.OpLOONG64CMPEQF,
   342  		ssa.OpLOONG64CMPEQD,
   343  		ssa.OpLOONG64CMPGEF,
   344  		ssa.OpLOONG64CMPGED,
   345  		ssa.OpLOONG64CMPGTF,
   346  		ssa.OpLOONG64CMPGTD:
   347  		p := s.Prog(v.Op.Asm())
   348  		p.From.Type = obj.TYPE_REG
   349  		p.From.Reg = v.Args[0].Reg()
   350  		p.Reg = v.Args[1].Reg()
   351  		p.To.Type = obj.TYPE_REG
   352  		p.To.Reg = loong64.REG_FCC0
   353  
   354  	case ssa.OpLOONG64FMADDF,
   355  		ssa.OpLOONG64FMADDD,
   356  		ssa.OpLOONG64FMSUBF,
   357  		ssa.OpLOONG64FMSUBD,
   358  		ssa.OpLOONG64FNMADDF,
   359  		ssa.OpLOONG64FNMADDD,
   360  		ssa.OpLOONG64FNMSUBF,
   361  		ssa.OpLOONG64FNMSUBD:
   362  		p := s.Prog(v.Op.Asm())
   363  		// r=(FMA x y z) -> FMADDD z, y, x, r
   364  		// the SSA operand order is for taking advantage of
   365  		// commutativity (that only applies for the first two operands)
   366  		r := v.Reg()
   367  		x := v.Args[0].Reg()
   368  		y := v.Args[1].Reg()
   369  		z := v.Args[2].Reg()
   370  		p.From.Type = obj.TYPE_REG
   371  		p.From.Reg = z
   372  		p.Reg = y
   373  		p.AddRestSourceReg(x)
   374  		p.To.Type = obj.TYPE_REG
   375  		p.To.Reg = r
   376  
   377  	case ssa.OpLOONG64MOVVaddr:
   378  		p := s.Prog(loong64.AMOVV)
   379  		p.From.Type = obj.TYPE_ADDR
   380  		p.From.Reg = v.Args[0].Reg()
   381  		var wantreg string
   382  		// MOVV $sym+off(base), R
   383  		// the assembler expands it as the following:
   384  		// - base is SP: add constant offset to SP (R3)
   385  		// when constant is large, tmp register (R30) may be used
   386  		// - base is SB: load external address with relocation
   387  		switch v.Aux.(type) {
   388  		default:
   389  			v.Fatalf("aux is of unknown type %T", v.Aux)
   390  		case *obj.LSym:
   391  			wantreg = "SB"
   392  			ssagen.AddAux(&p.From, v)
   393  		case *ir.Name:
   394  			wantreg = "SP"
   395  			ssagen.AddAux(&p.From, v)
   396  		case nil:
   397  			// No sym, just MOVV $off(SP), R
   398  			wantreg = "SP"
   399  			p.From.Offset = v.AuxInt
   400  		}
   401  		if reg := v.Args[0].RegName(); reg != wantreg {
   402  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   403  		}
   404  		p.To.Type = obj.TYPE_REG
   405  		p.To.Reg = v.Reg()
   406  
   407  	case ssa.OpLOONG64MOVBloadidx,
   408  		ssa.OpLOONG64MOVBUloadidx,
   409  		ssa.OpLOONG64MOVHloadidx,
   410  		ssa.OpLOONG64MOVHUloadidx,
   411  		ssa.OpLOONG64MOVWloadidx,
   412  		ssa.OpLOONG64MOVWUloadidx,
   413  		ssa.OpLOONG64MOVVloadidx,
   414  		ssa.OpLOONG64MOVFloadidx,
   415  		ssa.OpLOONG64MOVDloadidx:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_MEM
   418  		p.From.Name = obj.NAME_NONE
   419  		p.From.Reg = v.Args[0].Reg()
   420  		p.From.Index = v.Args[1].Reg()
   421  		p.To.Type = obj.TYPE_REG
   422  		p.To.Reg = v.Reg()
   423  
   424  	case ssa.OpLOONG64MOVBstoreidx,
   425  		ssa.OpLOONG64MOVHstoreidx,
   426  		ssa.OpLOONG64MOVWstoreidx,
   427  		ssa.OpLOONG64MOVVstoreidx,
   428  		ssa.OpLOONG64MOVFstoreidx,
   429  		ssa.OpLOONG64MOVDstoreidx:
   430  		p := s.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_REG
   432  		p.From.Reg = v.Args[2].Reg()
   433  		p.To.Type = obj.TYPE_MEM
   434  		p.To.Name = obj.NAME_NONE
   435  		p.To.Reg = v.Args[0].Reg()
   436  		p.To.Index = v.Args[1].Reg()
   437  
   438  	case ssa.OpLOONG64MOVBload,
   439  		ssa.OpLOONG64MOVBUload,
   440  		ssa.OpLOONG64MOVHload,
   441  		ssa.OpLOONG64MOVHUload,
   442  		ssa.OpLOONG64MOVWload,
   443  		ssa.OpLOONG64MOVWUload,
   444  		ssa.OpLOONG64MOVVload,
   445  		ssa.OpLOONG64MOVFload,
   446  		ssa.OpLOONG64MOVDload:
   447  		p := s.Prog(v.Op.Asm())
   448  		p.From.Type = obj.TYPE_MEM
   449  		p.From.Reg = v.Args[0].Reg()
   450  		ssagen.AddAux(&p.From, v)
   451  		p.To.Type = obj.TYPE_REG
   452  		p.To.Reg = v.Reg()
   453  	case ssa.OpLOONG64MOVBstore,
   454  		ssa.OpLOONG64MOVHstore,
   455  		ssa.OpLOONG64MOVWstore,
   456  		ssa.OpLOONG64MOVVstore,
   457  		ssa.OpLOONG64MOVFstore,
   458  		ssa.OpLOONG64MOVDstore:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_REG
   461  		p.From.Reg = v.Args[1].Reg()
   462  		p.To.Type = obj.TYPE_MEM
   463  		p.To.Reg = v.Args[0].Reg()
   464  		ssagen.AddAux(&p.To, v)
   465  	case ssa.OpLOONG64MOVBreg,
   466  		ssa.OpLOONG64MOVBUreg,
   467  		ssa.OpLOONG64MOVHreg,
   468  		ssa.OpLOONG64MOVHUreg,
   469  		ssa.OpLOONG64MOVWreg,
   470  		ssa.OpLOONG64MOVWUreg:
   471  		a := v.Args[0]
   472  		for a.Op == ssa.OpCopy || a.Op == ssa.OpLOONG64MOVVreg {
   473  			a = a.Args[0]
   474  		}
   475  		if a.Op == ssa.OpLoadReg && loong64.REG_R0 <= a.Reg() && a.Reg() <= loong64.REG_R31 {
   476  			// LoadReg from a narrower type does an extension, except loading
   477  			// to a floating point register. So only eliminate the extension
   478  			// if it is loaded to an integer register.
   479  
   480  			t := a.Type
   481  			switch {
   482  			case v.Op == ssa.OpLOONG64MOVBreg && t.Size() == 1 && t.IsSigned(),
   483  				v.Op == ssa.OpLOONG64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   484  				v.Op == ssa.OpLOONG64MOVHreg && t.Size() == 2 && t.IsSigned(),
   485  				v.Op == ssa.OpLOONG64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   486  				v.Op == ssa.OpLOONG64MOVWreg && t.Size() == 4 && t.IsSigned(),
   487  				v.Op == ssa.OpLOONG64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   488  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   489  				if v.Reg() == v.Args[0].Reg() {
   490  					return
   491  				}
   492  				p := s.Prog(loong64.AMOVV)
   493  				p.From.Type = obj.TYPE_REG
   494  				p.From.Reg = v.Args[0].Reg()
   495  				p.To.Type = obj.TYPE_REG
   496  				p.To.Reg = v.Reg()
   497  				return
   498  			default:
   499  			}
   500  		}
   501  		fallthrough
   502  
   503  	case ssa.OpLOONG64MOVWF,
   504  		ssa.OpLOONG64MOVWD,
   505  		ssa.OpLOONG64TRUNCFW,
   506  		ssa.OpLOONG64TRUNCDW,
   507  		ssa.OpLOONG64MOVVF,
   508  		ssa.OpLOONG64MOVVD,
   509  		ssa.OpLOONG64TRUNCFV,
   510  		ssa.OpLOONG64TRUNCDV,
   511  		ssa.OpLOONG64MOVFD,
   512  		ssa.OpLOONG64MOVDF,
   513  		ssa.OpLOONG64MOVWfpgp,
   514  		ssa.OpLOONG64MOVWgpfp,
   515  		ssa.OpLOONG64MOVVfpgp,
   516  		ssa.OpLOONG64MOVVgpfp,
   517  		ssa.OpLOONG64NEGF,
   518  		ssa.OpLOONG64NEGD,
   519  		ssa.OpLOONG64CLZW,
   520  		ssa.OpLOONG64CLZV,
   521  		ssa.OpLOONG64CTZW,
   522  		ssa.OpLOONG64CTZV,
   523  		ssa.OpLOONG64SQRTD,
   524  		ssa.OpLOONG64SQRTF,
   525  		ssa.OpLOONG64REVB2H,
   526  		ssa.OpLOONG64REVB2W,
   527  		ssa.OpLOONG64REVB4H,
   528  		ssa.OpLOONG64REVBV,
   529  		ssa.OpLOONG64BITREV4B,
   530  		ssa.OpLOONG64BITREVW,
   531  		ssa.OpLOONG64BITREVV,
   532  		ssa.OpLOONG64ABSF,
   533  		ssa.OpLOONG64ABSD:
   534  		p := s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = v.Args[0].Reg()
   537  		p.To.Type = obj.TYPE_REG
   538  		p.To.Reg = v.Reg()
   539  
   540  	case ssa.OpLOONG64VPCNT64,
   541  		ssa.OpLOONG64VPCNT32,
   542  		ssa.OpLOONG64VPCNT16,
   543  		ssa.OpLOONG64FRINTND,
   544  		ssa.OpLOONG64FRINTZD,
   545  		ssa.OpLOONG64FRINTPD,
   546  		ssa.OpLOONG64FRINTMD:
   547  		p := s.Prog(v.Op.Asm())
   548  		p.From.Type = obj.TYPE_REG
   549  		p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   550  		p.To.Type = obj.TYPE_REG
   551  		p.To.Reg = ((v.Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   552  
   553  	case ssa.OpLOONG64NEGV:
   554  		// SUB from REGZERO
   555  		p := s.Prog(loong64.ASUBVU)
   556  		p.From.Type = obj.TYPE_REG
   557  		p.From.Reg = v.Args[0].Reg()
   558  		p.Reg = loong64.REGZERO
   559  		p.To.Type = obj.TYPE_REG
   560  		p.To.Reg = v.Reg()
   561  
   562  	case ssa.OpLOONG64LoweredZero:
   563  		ptrReg := v.Args[0].Reg()
   564  		n := v.AuxInt
   565  		if n < 16 {
   566  			v.Fatalf("Zero too small %d", n)
   567  		}
   568  
   569  		// Generate Zeroing instructions.
   570  		var off int64
   571  		for n >= 8 {
   572  			// MOVV     ZR, off(ptrReg)
   573  			zero8(s, ptrReg, off)
   574  			off += 8
   575  			n -= 8
   576  		}
   577  		if n != 0 {
   578  			// MOVV     ZR, off+n-8(ptrReg)
   579  			zero8(s, ptrReg, off+n-8)
   580  		}
   581  	case ssa.OpLOONG64LoweredZeroLoop:
   582  		ptrReg := v.Args[0].Reg()
   583  		endReg := v.RegTmp()
   584  		flagReg := int16(loong64.REGTMP)
   585  		var off int64
   586  		n := v.AuxInt
   587  		loopSize := int64(64)
   588  		if n < 3*loopSize {
   589  			// - a loop count of 0 won't work.
   590  			// - a loop count of 1 is useless.
   591  			// - a loop count of 2 is a code size ~tie
   592  			//     4 instructions to implement the loop
   593  			//     8 instructions in the loop body
   594  			//   vs
   595  			//     16 instuctions in the straightline code
   596  			//   Might as well use straightline code.
   597  			v.Fatalf("ZeroLoop size too small %d", n)
   598  		}
   599  
   600  		//    ADDV    n - n%loopSize, ptrReg, endReg
   601  		//    MOVBU   ir.Syms.Loong64HasLSX, flagReg
   602  		//    BNE     flagReg, lsxInit
   603  		// genericLoop:
   604  		//    for off = 0; off < loopSize; off += 8 {
   605  		//            zero8(s, ptrReg, off)
   606  		//    }
   607  		//    ADDV    $loopSize, ptrReg
   608  		//    BNE     endReg, ptrReg, genericLoop
   609  		//    JMP     tail
   610  		// lsxInit:
   611  		//    VXORV   V31, V31, V31
   612  		// lsxLoop:
   613  		//    for off = 0; off < loopSize; off += 16 {
   614  		//            zero16(s, V31, ptrReg, off)
   615  		//    }
   616  		//    ADDV    $loopSize, ptrReg
   617  		//    BNE     endReg, ptrReg, lsxLoop
   618  		// tail:
   619  		//    n %= loopSize
   620  		//    for off = 0; n >= 8; off += 8, n -= 8 {
   621  		//            zero8(s, ptrReg, off)
   622  		//    }
   623  		//
   624  		//    if n != 0 {
   625  		//           zero8(s, ptrReg, off+n-8)
   626  		//    }
   627  
   628  		p1 := s.Prog(loong64.AADDV)
   629  		p1.From.Type = obj.TYPE_CONST
   630  		p1.From.Offset = n - n%loopSize
   631  		p1.Reg = ptrReg
   632  		p1.To.Type = obj.TYPE_REG
   633  		p1.To.Reg = endReg
   634  
   635  		p2 := s.Prog(loong64.AMOVBU)
   636  		p2.From.Type = obj.TYPE_MEM
   637  		p2.From.Name = obj.NAME_EXTERN
   638  		p2.From.Sym = ir.Syms.Loong64HasLSX
   639  		p2.To.Type = obj.TYPE_REG
   640  		p2.To.Reg = flagReg
   641  
   642  		p3 := s.Prog(loong64.ABNE)
   643  		p3.From.Type = obj.TYPE_REG
   644  		p3.From.Reg = flagReg
   645  		p3.To.Type = obj.TYPE_BRANCH
   646  
   647  		for off = 0; off < loopSize; off += 8 {
   648  			zero8(s, ptrReg, off)
   649  		}
   650  
   651  		p4 := s.Prog(loong64.AADDV)
   652  		p4.From.Type = obj.TYPE_CONST
   653  		p4.From.Offset = loopSize
   654  		p4.To.Type = obj.TYPE_REG
   655  		p4.To.Reg = ptrReg
   656  
   657  		p5 := s.Prog(loong64.ABNE)
   658  		p5.From.Type = obj.TYPE_REG
   659  		p5.From.Reg = endReg
   660  		p5.Reg = ptrReg
   661  		p5.To.Type = obj.TYPE_BRANCH
   662  		p5.To.SetTarget(p3.Link)
   663  
   664  		p6 := s.Prog(obj.AJMP)
   665  		p6.To.Type = obj.TYPE_BRANCH
   666  
   667  		p7 := s.Prog(loong64.AVXORV)
   668  		p7.From.Type = obj.TYPE_REG
   669  		p7.From.Reg = loong64.REG_V31
   670  		p7.To.Type = obj.TYPE_REG
   671  		p7.To.Reg = loong64.REG_V31
   672  		p3.To.SetTarget(p7)
   673  
   674  		for off = 0; off < loopSize; off += 16 {
   675  			zero16(s, loong64.REG_V31, ptrReg, off)
   676  		}
   677  
   678  		p8 := s.Prog(loong64.AADDV)
   679  		p8.From.Type = obj.TYPE_CONST
   680  		p8.From.Offset = loopSize
   681  		p8.To.Type = obj.TYPE_REG
   682  		p8.To.Reg = ptrReg
   683  
   684  		p9 := s.Prog(loong64.ABNE)
   685  		p9.From.Type = obj.TYPE_REG
   686  		p9.From.Reg = endReg
   687  		p9.Reg = ptrReg
   688  		p9.To.Type = obj.TYPE_BRANCH
   689  		p9.To.SetTarget(p7.Link)
   690  
   691  		p10 := s.Prog(obj.ANOP)
   692  		p6.To.SetTarget(p10)
   693  
   694  		// Multiples of the loop size are now done.
   695  		n %= loopSize
   696  		// Write any fractional portion.
   697  		for off = 0; n >= 8; off += 8 {
   698  			// MOVV   ZR, off(ptrReg)
   699  			zero8(s, ptrReg, off)
   700  			n -= 8
   701  		}
   702  
   703  		if n != 0 {
   704  			zero8(s, ptrReg, off+n-8)
   705  		}
   706  
   707  	case ssa.OpLOONG64LoweredMove:
   708  		dstReg := v.Args[0].Reg()
   709  		srcReg := v.Args[1].Reg()
   710  		if dstReg == srcReg {
   711  			break
   712  		}
   713  		tmpReg := int16(loong64.REG_R23)
   714  		n := v.AuxInt
   715  		if n < 16 {
   716  			v.Fatalf("Move too small %d", n)
   717  		}
   718  
   719  		var off int64
   720  		for n >= 8 {
   721  			// MOVV     off(srcReg), tmpReg
   722  			// MOVV     tmpReg, off(dstReg)
   723  			move8(s, srcReg, dstReg, tmpReg, off)
   724  			off += 8
   725  			n -= 8
   726  		}
   727  
   728  		if n != 0 {
   729  			// MOVV     off+n-8(srcReg), tmpReg
   730  			// MOVV     tmpReg, off+n-8(srcReg)
   731  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   732  		}
   733  	case ssa.OpLOONG64LoweredMoveLoop:
   734  		dstReg := v.Args[0].Reg()
   735  		srcReg := v.Args[1].Reg()
   736  		if dstReg == srcReg {
   737  			break
   738  		}
   739  		srcEndReg := int16(loong64.REG_R23)
   740  		tmpReg := int16(loong64.REG_R24)
   741  		var off int64
   742  		n := v.AuxInt
   743  		loopSize := int64(64)
   744  		if n < 3*loopSize {
   745  			// - a loop count of 0 won't work.
   746  			// - a loop count of 1 is useless.
   747  			// - a loop count of 2 is a code size ~tie
   748  			//     4 instructions to implement the loop
   749  			//     8 instructions in the loop body
   750  			//   vs
   751  			//     16 instructions in the straightline code
   752  			//   Might as well use straightline code.
   753  			v.Fatalf("MoveLoop size too small %d", n)
   754  		}
   755  
   756  		//    ADDV    n - n%loopSize, srcReg, srcEndReg
   757  		// Loop8:
   758  		//    for off = 0; off < loopSize; off += 8 {
   759  		//            move8(s, srcReg, dstReg, tmpReg, off)
   760  		//    }
   761  		//    ADDV    $loopSize, srcReg
   762  		//    ADDV    $loopSize, dstReg
   763  		//    BNE     srcEndReg, srcReg, Loop8
   764  		//
   765  		//    n %= loopSize
   766  		//    for off = 0; n >= 8; off += 8 {
   767  		//           move8(s, srcReg, dstReg, tmpReg, off)
   768  		//           n -= 8
   769  		//    }
   770  		//
   771  		//    if n != 0 {
   772  		//           move8(s, srcReg, dstReg, tmpReg, off+n-8)
   773  		//    }
   774  
   775  		p1 := s.Prog(loong64.AADDV)
   776  		p1.From.Type = obj.TYPE_CONST
   777  		p1.From.Offset = n - n%loopSize
   778  		p1.Reg = srcReg
   779  		p1.To.Type = obj.TYPE_REG
   780  		p1.To.Reg = srcEndReg
   781  
   782  		for off = 0; off < loopSize; off += 8 {
   783  			move8(s, srcReg, dstReg, tmpReg, off)
   784  		}
   785  
   786  		p2 := s.Prog(loong64.AADDV)
   787  		p2.From.Type = obj.TYPE_CONST
   788  		p2.From.Offset = loopSize
   789  		p2.To.Type = obj.TYPE_REG
   790  		p2.To.Reg = srcReg
   791  
   792  		p3 := s.Prog(loong64.AADDV)
   793  		p3.From.Type = obj.TYPE_CONST
   794  		p3.From.Offset = loopSize
   795  		p3.To.Type = obj.TYPE_REG
   796  		p3.To.Reg = dstReg
   797  
   798  		p4 := s.Prog(loong64.ABNE)
   799  		p4.From.Type = obj.TYPE_REG
   800  		p4.From.Reg = srcEndReg
   801  		p4.Reg = srcReg
   802  		p4.To.Type = obj.TYPE_BRANCH
   803  		p4.To.SetTarget(p1.Link)
   804  
   805  		// Multiples of the loop size are now done.
   806  		n %= loopSize
   807  
   808  		// Copy any fractional portion.
   809  		for off = 0; n >= 8; off += 8 {
   810  			move8(s, srcReg, dstReg, tmpReg, off)
   811  			n -= 8
   812  		}
   813  
   814  		if n != 0 {
   815  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   816  		}
   817  
   818  	case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter:
   819  		s.Call(v)
   820  	case ssa.OpLOONG64CALLtail, ssa.OpLOONG64CALLtailinter:
   821  		s.TailCall(v)
   822  	case ssa.OpLOONG64LoweredWB:
   823  		p := s.Prog(obj.ACALL)
   824  		p.To.Type = obj.TYPE_MEM
   825  		p.To.Name = obj.NAME_EXTERN
   826  		// AuxInt encodes how many buffer entries we need.
   827  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
   828  
   829  	case ssa.OpLOONG64LoweredPubBarrier:
   830  		// DBAR 0x1A
   831  		p := s.Prog(v.Op.Asm())
   832  		p.From.Type = obj.TYPE_CONST
   833  		p.From.Offset = 0x1A
   834  
   835  	case ssa.OpLOONG64LoweredPanicBoundsRR, ssa.OpLOONG64LoweredPanicBoundsRC, ssa.OpLOONG64LoweredPanicBoundsCR, ssa.OpLOONG64LoweredPanicBoundsCC:
   836  		// Compute the constant we put in the PCData entry for this call.
   837  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
   838  		xIsReg := false
   839  		yIsReg := false
   840  		xVal := 0
   841  		yVal := 0
   842  		switch v.Op {
   843  		case ssa.OpLOONG64LoweredPanicBoundsRR:
   844  			xIsReg = true
   845  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   846  			yIsReg = true
   847  			yVal = int(v.Args[1].Reg() - loong64.REG_R4)
   848  		case ssa.OpLOONG64LoweredPanicBoundsRC:
   849  			xIsReg = true
   850  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   851  			c := v.Aux.(ssa.PanicBoundsC).C
   852  			if c >= 0 && c <= abi.BoundsMaxConst {
   853  				yVal = int(c)
   854  			} else {
   855  				// Move constant to a register
   856  				yIsReg = true
   857  				if yVal == xVal {
   858  					yVal = 1
   859  				}
   860  				p := s.Prog(loong64.AMOVV)
   861  				p.From.Type = obj.TYPE_CONST
   862  				p.From.Offset = c
   863  				p.To.Type = obj.TYPE_REG
   864  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   865  			}
   866  		case ssa.OpLOONG64LoweredPanicBoundsCR:
   867  			yIsReg = true
   868  			yVal = int(v.Args[0].Reg() - loong64.REG_R4)
   869  			c := v.Aux.(ssa.PanicBoundsC).C
   870  			if c >= 0 && c <= abi.BoundsMaxConst {
   871  				xVal = int(c)
   872  			} else {
   873  				// Move constant to a register
   874  				xIsReg = true
   875  				if xVal == yVal {
   876  					xVal = 1
   877  				}
   878  				p := s.Prog(loong64.AMOVV)
   879  				p.From.Type = obj.TYPE_CONST
   880  				p.From.Offset = c
   881  				p.To.Type = obj.TYPE_REG
   882  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   883  			}
   884  		case ssa.OpLOONG64LoweredPanicBoundsCC:
   885  			c := v.Aux.(ssa.PanicBoundsCC).Cx
   886  			if c >= 0 && c <= abi.BoundsMaxConst {
   887  				xVal = int(c)
   888  			} else {
   889  				// Move constant to a register
   890  				xIsReg = true
   891  				p := s.Prog(loong64.AMOVV)
   892  				p.From.Type = obj.TYPE_CONST
   893  				p.From.Offset = c
   894  				p.To.Type = obj.TYPE_REG
   895  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   896  			}
   897  			c = v.Aux.(ssa.PanicBoundsCC).Cy
   898  			if c >= 0 && c <= abi.BoundsMaxConst {
   899  				yVal = int(c)
   900  			} else {
   901  				// Move constant to a register
   902  				yIsReg = true
   903  				yVal = 1
   904  				p := s.Prog(loong64.AMOVV)
   905  				p.From.Type = obj.TYPE_CONST
   906  				p.From.Offset = c
   907  				p.To.Type = obj.TYPE_REG
   908  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   909  			}
   910  		}
   911  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
   912  
   913  		p := s.Prog(obj.APCDATA)
   914  		p.From.SetConst(abi.PCDATA_PanicBounds)
   915  		p.To.SetConst(int64(c))
   916  		p = s.Prog(obj.ACALL)
   917  		p.To.Type = obj.TYPE_MEM
   918  		p.To.Name = obj.NAME_EXTERN
   919  		p.To.Sym = ir.Syms.PanicBounds
   920  
   921  	case ssa.OpLOONG64LoweredAtomicLoad8, ssa.OpLOONG64LoweredAtomicLoad32, ssa.OpLOONG64LoweredAtomicLoad64:
   922  		// MOVB	(Rarg0), Rout
   923  		// DBAR	0x14
   924  		as := loong64.AMOVV
   925  		switch v.Op {
   926  		case ssa.OpLOONG64LoweredAtomicLoad8:
   927  			as = loong64.AMOVB
   928  		case ssa.OpLOONG64LoweredAtomicLoad32:
   929  			as = loong64.AMOVW
   930  		}
   931  		p := s.Prog(as)
   932  		p.From.Type = obj.TYPE_MEM
   933  		p.From.Reg = v.Args[0].Reg()
   934  		p.To.Type = obj.TYPE_REG
   935  		p.To.Reg = v.Reg0()
   936  		p1 := s.Prog(loong64.ADBAR)
   937  		p1.From.Type = obj.TYPE_CONST
   938  		p1.From.Offset = 0x14
   939  
   940  	case ssa.OpLOONG64LoweredAtomicStore8,
   941  		ssa.OpLOONG64LoweredAtomicStore32,
   942  		ssa.OpLOONG64LoweredAtomicStore64:
   943  		// DBAR 0x12
   944  		// MOVx (Rarg1), Rout
   945  		// DBAR 0x18
   946  		movx := loong64.AMOVV
   947  		switch v.Op {
   948  		case ssa.OpLOONG64LoweredAtomicStore8:
   949  			movx = loong64.AMOVB
   950  		case ssa.OpLOONG64LoweredAtomicStore32:
   951  			movx = loong64.AMOVW
   952  		}
   953  		p := s.Prog(loong64.ADBAR)
   954  		p.From.Type = obj.TYPE_CONST
   955  		p.From.Offset = 0x12
   956  
   957  		p1 := s.Prog(movx)
   958  		p1.From.Type = obj.TYPE_REG
   959  		p1.From.Reg = v.Args[1].Reg()
   960  		p1.To.Type = obj.TYPE_MEM
   961  		p1.To.Reg = v.Args[0].Reg()
   962  
   963  		p2 := s.Prog(loong64.ADBAR)
   964  		p2.From.Type = obj.TYPE_CONST
   965  		p2.From.Offset = 0x18
   966  
   967  	case ssa.OpLOONG64LoweredAtomicStore8Variant,
   968  		ssa.OpLOONG64LoweredAtomicStore32Variant,
   969  		ssa.OpLOONG64LoweredAtomicStore64Variant:
   970  		//AMSWAPx  Rarg1, (Rarg0), Rout
   971  		amswapx := loong64.AAMSWAPDBV
   972  		switch v.Op {
   973  		case ssa.OpLOONG64LoweredAtomicStore32Variant:
   974  			amswapx = loong64.AAMSWAPDBW
   975  		case ssa.OpLOONG64LoweredAtomicStore8Variant:
   976  			amswapx = loong64.AAMSWAPDBB
   977  		}
   978  		p := s.Prog(amswapx)
   979  		p.From.Type = obj.TYPE_REG
   980  		p.From.Reg = v.Args[1].Reg()
   981  		p.To.Type = obj.TYPE_MEM
   982  		p.To.Reg = v.Args[0].Reg()
   983  		p.RegTo2 = loong64.REGZERO
   984  
   985  	case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64:
   986  		// AMSWAPx	Rarg1, (Rarg0), Rout
   987  		amswapx := loong64.AAMSWAPDBV
   988  		if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 {
   989  			amswapx = loong64.AAMSWAPDBW
   990  		}
   991  		p := s.Prog(amswapx)
   992  		p.From.Type = obj.TYPE_REG
   993  		p.From.Reg = v.Args[1].Reg()
   994  		p.To.Type = obj.TYPE_MEM
   995  		p.To.Reg = v.Args[0].Reg()
   996  		p.RegTo2 = v.Reg0()
   997  
   998  	case ssa.OpLOONG64LoweredAtomicExchange8Variant:
   999  		// AMSWAPDBB	Rarg1, (Rarg0), Rout
  1000  		p := s.Prog(loong64.AAMSWAPDBB)
  1001  		p.From.Type = obj.TYPE_REG
  1002  		p.From.Reg = v.Args[1].Reg()
  1003  		p.To.Type = obj.TYPE_MEM
  1004  		p.To.Reg = v.Args[0].Reg()
  1005  		p.RegTo2 = v.Reg0()
  1006  
  1007  	case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
  1008  		// AMADDx  Rarg1, (Rarg0), Rout
  1009  		// ADDV    Rarg1, Rout, Rout
  1010  		amaddx := loong64.AAMADDDBV
  1011  		addx := loong64.AADDV
  1012  		if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 {
  1013  			amaddx = loong64.AAMADDDBW
  1014  		}
  1015  		p := s.Prog(amaddx)
  1016  		p.From.Type = obj.TYPE_REG
  1017  		p.From.Reg = v.Args[1].Reg()
  1018  		p.To.Type = obj.TYPE_MEM
  1019  		p.To.Reg = v.Args[0].Reg()
  1020  		p.RegTo2 = v.Reg0()
  1021  
  1022  		p1 := s.Prog(addx)
  1023  		p1.From.Type = obj.TYPE_REG
  1024  		p1.From.Reg = v.Args[1].Reg()
  1025  		p1.Reg = v.Reg0()
  1026  		p1.To.Type = obj.TYPE_REG
  1027  		p1.To.Reg = v.Reg0()
  1028  
  1029  	case ssa.OpLOONG64LoweredAtomicCas32, ssa.OpLOONG64LoweredAtomicCas64:
  1030  		// MOVV $0, Rout
  1031  		// DBAR 0x14
  1032  		// LL	(Rarg0), Rtmp
  1033  		// BNE	Rtmp, Rarg1, 4(PC)
  1034  		// MOVV Rarg2, Rout
  1035  		// SC	Rout, (Rarg0)
  1036  		// BEQ	Rout, -4(PC)
  1037  		// DBAR 0x12
  1038  		ll := loong64.ALLV
  1039  		sc := loong64.ASCV
  1040  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32 {
  1041  			ll = loong64.ALL
  1042  			sc = loong64.ASC
  1043  		}
  1044  
  1045  		p := s.Prog(loong64.AMOVV)
  1046  		p.From.Type = obj.TYPE_REG
  1047  		p.From.Reg = loong64.REGZERO
  1048  		p.To.Type = obj.TYPE_REG
  1049  		p.To.Reg = v.Reg0()
  1050  
  1051  		p1 := s.Prog(loong64.ADBAR)
  1052  		p1.From.Type = obj.TYPE_CONST
  1053  		p1.From.Offset = 0x14
  1054  
  1055  		p2 := s.Prog(ll)
  1056  		p2.From.Type = obj.TYPE_MEM
  1057  		p2.From.Reg = v.Args[0].Reg()
  1058  		p2.To.Type = obj.TYPE_REG
  1059  		p2.To.Reg = loong64.REGTMP
  1060  
  1061  		p3 := s.Prog(loong64.ABNE)
  1062  		p3.From.Type = obj.TYPE_REG
  1063  		p3.From.Reg = v.Args[1].Reg()
  1064  		p3.Reg = loong64.REGTMP
  1065  		p3.To.Type = obj.TYPE_BRANCH
  1066  
  1067  		p4 := s.Prog(loong64.AMOVV)
  1068  		p4.From.Type = obj.TYPE_REG
  1069  		p4.From.Reg = v.Args[2].Reg()
  1070  		p4.To.Type = obj.TYPE_REG
  1071  		p4.To.Reg = v.Reg0()
  1072  
  1073  		p5 := s.Prog(sc)
  1074  		p5.From.Type = obj.TYPE_REG
  1075  		p5.From.Reg = v.Reg0()
  1076  		p5.To.Type = obj.TYPE_MEM
  1077  		p5.To.Reg = v.Args[0].Reg()
  1078  
  1079  		p6 := s.Prog(loong64.ABEQ)
  1080  		p6.From.Type = obj.TYPE_REG
  1081  		p6.From.Reg = v.Reg0()
  1082  		p6.To.Type = obj.TYPE_BRANCH
  1083  		p6.To.SetTarget(p2)
  1084  
  1085  		p7 := s.Prog(loong64.ADBAR)
  1086  		p7.From.Type = obj.TYPE_CONST
  1087  		p7.From.Offset = 0x12
  1088  		p3.To.SetTarget(p7)
  1089  
  1090  	case ssa.OpLOONG64LoweredAtomicAnd32,
  1091  		ssa.OpLOONG64LoweredAtomicOr32:
  1092  		// AM{AND,OR}DBx  Rarg1, (Rarg0), RegZero
  1093  		p := s.Prog(v.Op.Asm())
  1094  		p.From.Type = obj.TYPE_REG
  1095  		p.From.Reg = v.Args[1].Reg()
  1096  		p.To.Type = obj.TYPE_MEM
  1097  		p.To.Reg = v.Args[0].Reg()
  1098  		p.RegTo2 = loong64.REGZERO
  1099  
  1100  	case ssa.OpLOONG64LoweredAtomicAnd32value,
  1101  		ssa.OpLOONG64LoweredAtomicAnd64value,
  1102  		ssa.OpLOONG64LoweredAtomicOr64value,
  1103  		ssa.OpLOONG64LoweredAtomicOr32value:
  1104  		// AM{AND,OR}DBx  Rarg1, (Rarg0), Rout
  1105  		p := s.Prog(v.Op.Asm())
  1106  		p.From.Type = obj.TYPE_REG
  1107  		p.From.Reg = v.Args[1].Reg()
  1108  		p.To.Type = obj.TYPE_MEM
  1109  		p.To.Reg = v.Args[0].Reg()
  1110  		p.RegTo2 = v.Reg0()
  1111  
  1112  	case ssa.OpLOONG64LoweredAtomicCas64Variant, ssa.OpLOONG64LoweredAtomicCas32Variant:
  1113  		// MOVV         $0, Rout
  1114  		// MOVV         Rarg1, Rtmp
  1115  		// AMCASDBx     Rarg2, (Rarg0), Rtmp
  1116  		// BNE          Rarg1, Rtmp, 2(PC)
  1117  		// MOVV         $1, Rout
  1118  		// NOP
  1119  
  1120  		amcasx := loong64.AAMCASDBV
  1121  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32Variant {
  1122  			amcasx = loong64.AAMCASDBW
  1123  		}
  1124  
  1125  		p := s.Prog(loong64.AMOVV)
  1126  		p.From.Type = obj.TYPE_REG
  1127  		p.From.Reg = loong64.REGZERO
  1128  		p.To.Type = obj.TYPE_REG
  1129  		p.To.Reg = v.Reg0()
  1130  
  1131  		p1 := s.Prog(loong64.AMOVV)
  1132  		p1.From.Type = obj.TYPE_REG
  1133  		p1.From.Reg = v.Args[1].Reg()
  1134  		p1.To.Type = obj.TYPE_REG
  1135  		p1.To.Reg = loong64.REGTMP
  1136  
  1137  		p2 := s.Prog(amcasx)
  1138  		p2.From.Type = obj.TYPE_REG
  1139  		p2.From.Reg = v.Args[2].Reg()
  1140  		p2.To.Type = obj.TYPE_MEM
  1141  		p2.To.Reg = v.Args[0].Reg()
  1142  		p2.RegTo2 = loong64.REGTMP
  1143  
  1144  		p3 := s.Prog(loong64.ABNE)
  1145  		p3.From.Type = obj.TYPE_REG
  1146  		p3.From.Reg = v.Args[1].Reg()
  1147  		p3.Reg = loong64.REGTMP
  1148  		p3.To.Type = obj.TYPE_BRANCH
  1149  
  1150  		p4 := s.Prog(loong64.AMOVV)
  1151  		p4.From.Type = obj.TYPE_CONST
  1152  		p4.From.Offset = 0x1
  1153  		p4.To.Type = obj.TYPE_REG
  1154  		p4.To.Reg = v.Reg0()
  1155  
  1156  		p5 := s.Prog(obj.ANOP)
  1157  		p3.To.SetTarget(p5)
  1158  
  1159  	case ssa.OpLOONG64LoweredNilCheck:
  1160  		// Issue a load which will fault if arg is nil.
  1161  		p := s.Prog(loong64.AMOVB)
  1162  		p.From.Type = obj.TYPE_MEM
  1163  		p.From.Reg = v.Args[0].Reg()
  1164  		ssagen.AddAux(&p.From, v)
  1165  		p.To.Type = obj.TYPE_REG
  1166  		p.To.Reg = loong64.REGTMP
  1167  		if logopt.Enabled() {
  1168  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1169  		}
  1170  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1171  			base.WarnfAt(v.Pos, "generated nil check")
  1172  		}
  1173  	case ssa.OpLOONG64FPFlagTrue,
  1174  		ssa.OpLOONG64FPFlagFalse:
  1175  		// MOVV	$0, r
  1176  		// BFPF	2(PC)
  1177  		// MOVV	$1, r
  1178  		branch := loong64.ABFPF
  1179  		if v.Op == ssa.OpLOONG64FPFlagFalse {
  1180  			branch = loong64.ABFPT
  1181  		}
  1182  		p := s.Prog(loong64.AMOVV)
  1183  		p.From.Type = obj.TYPE_REG
  1184  		p.From.Reg = loong64.REGZERO
  1185  		p.To.Type = obj.TYPE_REG
  1186  		p.To.Reg = v.Reg()
  1187  		p2 := s.Prog(branch)
  1188  		p2.To.Type = obj.TYPE_BRANCH
  1189  		p3 := s.Prog(loong64.AMOVV)
  1190  		p3.From.Type = obj.TYPE_CONST
  1191  		p3.From.Offset = 1
  1192  		p3.To.Type = obj.TYPE_REG
  1193  		p3.To.Reg = v.Reg()
  1194  		p4 := s.Prog(obj.ANOP) // not a machine instruction, for branch to land
  1195  		p2.To.SetTarget(p4)
  1196  	case ssa.OpLOONG64LoweredGetClosurePtr:
  1197  		// Closure pointer is R22 (loong64.REGCTXT).
  1198  		ssagen.CheckLoweredGetClosurePtr(v)
  1199  	case ssa.OpLOONG64LoweredGetCallerSP:
  1200  		// caller's SP is FixedFrameSize below the address of the first arg
  1201  		p := s.Prog(loong64.AMOVV)
  1202  		p.From.Type = obj.TYPE_ADDR
  1203  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1204  		p.From.Name = obj.NAME_PARAM
  1205  		p.To.Type = obj.TYPE_REG
  1206  		p.To.Reg = v.Reg()
  1207  	case ssa.OpLOONG64LoweredGetCallerPC:
  1208  		p := s.Prog(obj.AGETCALLERPC)
  1209  		p.To.Type = obj.TYPE_REG
  1210  		p.To.Reg = v.Reg()
  1211  	case ssa.OpLOONG64MASKEQZ, ssa.OpLOONG64MASKNEZ:
  1212  		p := s.Prog(v.Op.Asm())
  1213  		p.From.Type = obj.TYPE_REG
  1214  		p.From.Reg = v.Args[1].Reg()
  1215  		p.Reg = v.Args[0].Reg()
  1216  		p.To.Type = obj.TYPE_REG
  1217  		p.To.Reg = v.Reg()
  1218  
  1219  	case ssa.OpLOONG64PRELD:
  1220  		// PRELD (Rarg0), hint
  1221  		p := s.Prog(v.Op.Asm())
  1222  		p.From.Type = obj.TYPE_MEM
  1223  		p.From.Reg = v.Args[0].Reg()
  1224  		p.AddRestSourceConst(v.AuxInt & 0x1f)
  1225  
  1226  	case ssa.OpLOONG64PRELDX:
  1227  		// PRELDX (Rarg0), $n, $hint
  1228  		p := s.Prog(v.Op.Asm())
  1229  		p.From.Type = obj.TYPE_MEM
  1230  		p.From.Reg = v.Args[0].Reg()
  1231  		p.AddRestSourceArgs([]obj.Addr{
  1232  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 5) & 0x1fffffffff},
  1233  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 0) & 0x1f},
  1234  		})
  1235  
  1236  	case ssa.OpLOONG64ADDshiftLLV:
  1237  		// ADDshiftLLV Rarg0, Rarg1, $shift
  1238  		// ALSLV $shift, Rarg1, Rarg0, Rtmp
  1239  		p := s.Prog(v.Op.Asm())
  1240  		p.From.Type = obj.TYPE_CONST
  1241  		p.From.Offset = v.AuxInt
  1242  		p.Reg = v.Args[1].Reg()
  1243  		p.AddRestSourceReg(v.Args[0].Reg())
  1244  		p.To.Type = obj.TYPE_REG
  1245  		p.To.Reg = v.Reg()
  1246  
  1247  	case ssa.OpClobber, ssa.OpClobberReg:
  1248  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1249  	default:
  1250  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1251  	}
  1252  }
  1253  
  1254  var blockJump = map[ssa.BlockKind]struct {
  1255  	asm, invasm obj.As
  1256  }{
  1257  	ssa.BlockLOONG64EQZ:  {loong64.ABEQ, loong64.ABNE},
  1258  	ssa.BlockLOONG64NEZ:  {loong64.ABNE, loong64.ABEQ},
  1259  	ssa.BlockLOONG64LTZ:  {loong64.ABLTZ, loong64.ABGEZ},
  1260  	ssa.BlockLOONG64GEZ:  {loong64.ABGEZ, loong64.ABLTZ},
  1261  	ssa.BlockLOONG64LEZ:  {loong64.ABLEZ, loong64.ABGTZ},
  1262  	ssa.BlockLOONG64GTZ:  {loong64.ABGTZ, loong64.ABLEZ},
  1263  	ssa.BlockLOONG64FPT:  {loong64.ABFPT, loong64.ABFPF},
  1264  	ssa.BlockLOONG64FPF:  {loong64.ABFPF, loong64.ABFPT},
  1265  	ssa.BlockLOONG64BEQ:  {loong64.ABEQ, loong64.ABNE},
  1266  	ssa.BlockLOONG64BNE:  {loong64.ABNE, loong64.ABEQ},
  1267  	ssa.BlockLOONG64BGE:  {loong64.ABGE, loong64.ABLT},
  1268  	ssa.BlockLOONG64BLT:  {loong64.ABLT, loong64.ABGE},
  1269  	ssa.BlockLOONG64BLTU: {loong64.ABLTU, loong64.ABGEU},
  1270  	ssa.BlockLOONG64BGEU: {loong64.ABGEU, loong64.ABLTU},
  1271  }
  1272  
  1273  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1274  	switch b.Kind {
  1275  	case ssa.BlockPlain, ssa.BlockDefer:
  1276  		if b.Succs[0].Block() != next {
  1277  			p := s.Prog(obj.AJMP)
  1278  			p.To.Type = obj.TYPE_BRANCH
  1279  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1280  		}
  1281  	case ssa.BlockExit, ssa.BlockRetJmp:
  1282  	case ssa.BlockRet:
  1283  		s.Prog(obj.ARET)
  1284  	case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1285  		ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1286  		ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1287  		ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1288  		ssa.BlockLOONG64BLT, ssa.BlockLOONG64BGE,
  1289  		ssa.BlockLOONG64BLTU, ssa.BlockLOONG64BGEU,
  1290  		ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1291  		jmp := blockJump[b.Kind]
  1292  		var p *obj.Prog
  1293  		switch next {
  1294  		case b.Succs[0].Block():
  1295  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1296  		case b.Succs[1].Block():
  1297  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1298  		default:
  1299  			if b.Likely != ssa.BranchUnlikely {
  1300  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1301  				s.Br(obj.AJMP, b.Succs[1].Block())
  1302  			} else {
  1303  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1304  				s.Br(obj.AJMP, b.Succs[0].Block())
  1305  			}
  1306  		}
  1307  		switch b.Kind {
  1308  		case ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1309  			ssa.BlockLOONG64BGE, ssa.BlockLOONG64BLT,
  1310  			ssa.BlockLOONG64BGEU, ssa.BlockLOONG64BLTU:
  1311  			p.From.Type = obj.TYPE_REG
  1312  			p.From.Reg = b.Controls[0].Reg()
  1313  			p.Reg = b.Controls[1].Reg()
  1314  		case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1315  			ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1316  			ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1317  			ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1318  			if !b.Controls[0].Type.IsFlags() {
  1319  				p.From.Type = obj.TYPE_REG
  1320  				p.From.Reg = b.Controls[0].Reg()
  1321  			}
  1322  		}
  1323  	case ssa.BlockLOONG64JUMPTABLE:
  1324  		// ALSLV $3, Rarg0, Rarg1, REGTMP
  1325  		// MOVV (REGTMP), REGTMP
  1326  		// JMP	(REGTMP)
  1327  		p := s.Prog(loong64.AALSLV)
  1328  		p.From.Type = obj.TYPE_CONST
  1329  		p.From.Offset = 3 // idx*8
  1330  		p.Reg = b.Controls[0].Reg()
  1331  		p.AddRestSourceReg(b.Controls[1].Reg())
  1332  		p.To.Type = obj.TYPE_REG
  1333  		p.To.Reg = loong64.REGTMP
  1334  		p1 := s.Prog(loong64.AMOVV)
  1335  		p1.From.Type = obj.TYPE_MEM
  1336  		p1.From.Reg = loong64.REGTMP
  1337  		p1.From.Offset = 0
  1338  		p1.To.Type = obj.TYPE_REG
  1339  		p1.To.Reg = loong64.REGTMP
  1340  		p2 := s.Prog(obj.AJMP)
  1341  		p2.To.Type = obj.TYPE_MEM
  1342  		p2.To.Reg = loong64.REGTMP
  1343  		// Save jump tables for later resolution of the target blocks.
  1344  		s.JumpTables = append(s.JumpTables, b)
  1345  
  1346  	default:
  1347  		b.Fatalf("branch not implemented: %s", b.LongString())
  1348  	}
  1349  }
  1350  
  1351  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1352  	p := s.Prog(loadByType(t, reg))
  1353  	p.From.Type = obj.TYPE_MEM
  1354  	p.From.Name = obj.NAME_AUTO
  1355  	p.From.Sym = n.Linksym()
  1356  	p.From.Offset = n.FrameOffset() + off
  1357  	p.To.Type = obj.TYPE_REG
  1358  	p.To.Reg = reg
  1359  	return p
  1360  }
  1361  
  1362  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1363  	p = pp.Append(p, storeByType(t, reg), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1364  	p.To.Name = obj.NAME_PARAM
  1365  	p.To.Sym = n.Linksym()
  1366  	p.Pos = p.Pos.WithNotStmt()
  1367  	return p
  1368  }
  1369  
  1370  // move8 copies 8 bytes at src+off to dst+off.
  1371  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1372  	// MOVV     off(src), tmp
  1373  	ld := s.Prog(loong64.AMOVV)
  1374  	ld.From.Type = obj.TYPE_MEM
  1375  	ld.From.Reg = src
  1376  	ld.From.Offset = off
  1377  	ld.To.Type = obj.TYPE_REG
  1378  	ld.To.Reg = tmp
  1379  	// MOVV     tmp, off(dst)
  1380  	st := s.Prog(loong64.AMOVV)
  1381  	st.From.Type = obj.TYPE_REG
  1382  	st.From.Reg = tmp
  1383  	st.To.Type = obj.TYPE_MEM
  1384  	st.To.Reg = dst
  1385  	st.To.Offset = off
  1386  }
  1387  
  1388  // zero8 zeroes 8 bytes at reg+off.
  1389  func zero8(s *ssagen.State, reg int16, off int64) {
  1390  	// MOVV   ZR, off(reg)
  1391  	p := s.Prog(loong64.AMOVV)
  1392  	p.From.Type = obj.TYPE_REG
  1393  	p.From.Reg = loong64.REGZERO
  1394  	p.To.Type = obj.TYPE_MEM
  1395  	p.To.Reg = reg
  1396  	p.To.Offset = off
  1397  }
  1398  
  1399  // zero16 zeroes 16 bytes at reg+off.
  1400  func zero16(s *ssagen.State, regZero, regBase int16, off int64) {
  1401  	// VMOVQ   regZero, off(regBase)
  1402  	p := s.Prog(loong64.AVMOVQ)
  1403  	p.From.Type = obj.TYPE_REG
  1404  	p.From.Reg = regZero
  1405  	p.To.Type = obj.TYPE_MEM
  1406  	p.To.Reg = regBase
  1407  	p.To.Offset = off
  1408  }
  1409  

View as plain text