Source file src/cmd/compile/internal/loong64/ssa.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package loong64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/loong64"
    19  	"internal/abi"
    20  )
    21  
    22  // isFPreg reports whether r is an FP register.
    23  func isFPreg(r int16) bool {
    24  	return loong64.REG_F0 <= r && r <= loong64.REG_F31
    25  }
    26  
    27  // loadByType returns the load instruction of the given type.
    28  func loadByType(t *types.Type, r int16) obj.As {
    29  	if isFPreg(r) {
    30  		if t.Size() == 4 {
    31  			return loong64.AMOVF
    32  		} else {
    33  			return loong64.AMOVD
    34  		}
    35  	} else {
    36  		switch t.Size() {
    37  		case 1:
    38  			if t.IsSigned() {
    39  				return loong64.AMOVB
    40  			} else {
    41  				return loong64.AMOVBU
    42  			}
    43  		case 2:
    44  			if t.IsSigned() {
    45  				return loong64.AMOVH
    46  			} else {
    47  				return loong64.AMOVHU
    48  			}
    49  		case 4:
    50  			if t.IsSigned() {
    51  				return loong64.AMOVW
    52  			} else {
    53  				return loong64.AMOVWU
    54  			}
    55  		case 8:
    56  			return loong64.AMOVV
    57  		}
    58  	}
    59  	panic("bad load type")
    60  }
    61  
    62  // storeByType returns the store instruction of the given type.
    63  func storeByType(t *types.Type, r int16) obj.As {
    64  	if isFPreg(r) {
    65  		if t.Size() == 4 {
    66  			return loong64.AMOVF
    67  		} else {
    68  			return loong64.AMOVD
    69  		}
    70  	} else {
    71  		switch t.Size() {
    72  		case 1:
    73  			return loong64.AMOVB
    74  		case 2:
    75  			return loong64.AMOVH
    76  		case 4:
    77  			return loong64.AMOVW
    78  		case 8:
    79  			return loong64.AMOVV
    80  		}
    81  	}
    82  	panic("bad store type")
    83  }
    84  
    85  // largestMove returns the largest move instruction possible and its size,
    86  // given the alignment of the total size of the move.
    87  //
    88  // e.g., a 16-byte move may use MOVV, but an 11-byte move must use MOVB.
    89  //
    90  // Note that the moves may not be on naturally aligned addresses depending on
    91  // the source and destination.
    92  //
    93  // This matches the calculation in ssa.moveSize.
    94  func largestMove(alignment int64) (obj.As, int64) {
    95  	switch {
    96  	case alignment%8 == 0:
    97  		return loong64.AMOVV, 8
    98  	case alignment%4 == 0:
    99  		return loong64.AMOVW, 4
   100  	case alignment%2 == 0:
   101  		return loong64.AMOVH, 2
   102  	default:
   103  		return loong64.AMOVB, 1
   104  	}
   105  }
   106  
   107  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   108  	switch v.Op {
   109  	case ssa.OpCopy, ssa.OpLOONG64MOVVreg:
   110  		if v.Type.IsMemory() {
   111  			return
   112  		}
   113  		x := v.Args[0].Reg()
   114  		y := v.Reg()
   115  		if x == y {
   116  			return
   117  		}
   118  		as := loong64.AMOVV
   119  		if isFPreg(x) && isFPreg(y) {
   120  			as = loong64.AMOVD
   121  		}
   122  		p := s.Prog(as)
   123  		p.From.Type = obj.TYPE_REG
   124  		p.From.Reg = x
   125  		p.To.Type = obj.TYPE_REG
   126  		p.To.Reg = y
   127  	case ssa.OpLOONG64MOVVnop,
   128  		ssa.OpLOONG64ZERO,
   129  		ssa.OpLOONG64LoweredRound32F,
   130  		ssa.OpLOONG64LoweredRound64F:
   131  		// nothing to do
   132  	case ssa.OpLoadReg:
   133  		if v.Type.IsFlags() {
   134  			v.Fatalf("load flags not implemented: %v", v.LongString())
   135  			return
   136  		}
   137  		r := v.Reg()
   138  		p := s.Prog(loadByType(v.Type, r))
   139  		ssagen.AddrAuto(&p.From, v.Args[0])
   140  		p.To.Type = obj.TYPE_REG
   141  		p.To.Reg = r
   142  	case ssa.OpStoreReg:
   143  		if v.Type.IsFlags() {
   144  			v.Fatalf("store flags not implemented: %v", v.LongString())
   145  			return
   146  		}
   147  		r := v.Args[0].Reg()
   148  		p := s.Prog(storeByType(v.Type, r))
   149  		p.From.Type = obj.TYPE_REG
   150  		p.From.Reg = r
   151  		ssagen.AddrAuto(&p.To, v)
   152  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   153  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   154  		// The loop only runs once.
   155  		for _, a := range v.Block.Func.RegArgs {
   156  			// Pass the spill/unspill information along to the assembler, offset by size of
   157  			// the saved LR slot.
   158  			addr := ssagen.SpillSlotAddr(a, loong64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   159  			s.FuncInfo().AddSpill(
   160  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type, a.Reg), Spill: storeByType(a.Type, a.Reg)})
   161  		}
   162  		v.Block.Func.RegArgs = nil
   163  		ssagen.CheckArgReg(v)
   164  	case ssa.OpLOONG64ADDV,
   165  		ssa.OpLOONG64SUBV,
   166  		ssa.OpLOONG64AND,
   167  		ssa.OpLOONG64OR,
   168  		ssa.OpLOONG64XOR,
   169  		ssa.OpLOONG64NOR,
   170  		ssa.OpLOONG64ANDN,
   171  		ssa.OpLOONG64ORN,
   172  		ssa.OpLOONG64SLL,
   173  		ssa.OpLOONG64SLLV,
   174  		ssa.OpLOONG64SRL,
   175  		ssa.OpLOONG64SRLV,
   176  		ssa.OpLOONG64SRA,
   177  		ssa.OpLOONG64SRAV,
   178  		ssa.OpLOONG64ROTR,
   179  		ssa.OpLOONG64ROTRV,
   180  		ssa.OpLOONG64ADDF,
   181  		ssa.OpLOONG64ADDD,
   182  		ssa.OpLOONG64SUBF,
   183  		ssa.OpLOONG64SUBD,
   184  		ssa.OpLOONG64MULF,
   185  		ssa.OpLOONG64MULD,
   186  		ssa.OpLOONG64DIVF,
   187  		ssa.OpLOONG64DIVD,
   188  		ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU, ssa.OpLOONG64MULH, ssa.OpLOONG64MULHU,
   189  		ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU,
   190  		ssa.OpLOONG64MULWVW, ssa.OpLOONG64MULWVWU,
   191  		ssa.OpLOONG64FCOPYSGD:
   192  		p := s.Prog(v.Op.Asm())
   193  		p.From.Type = obj.TYPE_REG
   194  		p.From.Reg = v.Args[1].Reg()
   195  		p.Reg = v.Args[0].Reg()
   196  		p.To.Type = obj.TYPE_REG
   197  		p.To.Reg = v.Reg()
   198  
   199  	case ssa.OpLOONG64BSTRPICKV,
   200  		ssa.OpLOONG64BSTRPICKW:
   201  		p := s.Prog(v.Op.Asm())
   202  		p.From.Type = obj.TYPE_CONST
   203  		if v.Op == ssa.OpLOONG64BSTRPICKW {
   204  			p.From.Offset = v.AuxInt >> 5
   205  			p.AddRestSourceConst(v.AuxInt & 0x1f)
   206  		} else {
   207  			p.From.Offset = v.AuxInt >> 6
   208  			p.AddRestSourceConst(v.AuxInt & 0x3f)
   209  		}
   210  		p.Reg = v.Args[0].Reg()
   211  		p.To.Type = obj.TYPE_REG
   212  		p.To.Reg = v.Reg()
   213  
   214  	case ssa.OpLOONG64FMINF,
   215  		ssa.OpLOONG64FMIND,
   216  		ssa.OpLOONG64FMAXF,
   217  		ssa.OpLOONG64FMAXD:
   218  		// ADDD Rarg0, Rarg1, Rout
   219  		// CMPEQD Rarg0, Rarg0, FCC0
   220  		// bceqz FCC0, end
   221  		// CMPEQD Rarg1, Rarg1, FCC0
   222  		// bceqz FCC0, end
   223  		// F(MIN|MAX)(F|D)
   224  
   225  		r0 := v.Args[0].Reg()
   226  		r1 := v.Args[1].Reg()
   227  		out := v.Reg()
   228  		add, fcmp := loong64.AADDD, loong64.ACMPEQD
   229  		if v.Op == ssa.OpLOONG64FMINF || v.Op == ssa.OpLOONG64FMAXF {
   230  			add = loong64.AADDF
   231  			fcmp = loong64.ACMPEQF
   232  		}
   233  		p1 := s.Prog(add)
   234  		p1.From.Type = obj.TYPE_REG
   235  		p1.From.Reg = r0
   236  		p1.Reg = r1
   237  		p1.To.Type = obj.TYPE_REG
   238  		p1.To.Reg = out
   239  
   240  		p2 := s.Prog(fcmp)
   241  		p2.From.Type = obj.TYPE_REG
   242  		p2.From.Reg = r0
   243  		p2.Reg = r0
   244  		p2.To.Type = obj.TYPE_REG
   245  		p2.To.Reg = loong64.REG_FCC0
   246  
   247  		p3 := s.Prog(loong64.ABFPF)
   248  		p3.To.Type = obj.TYPE_BRANCH
   249  
   250  		p4 := s.Prog(fcmp)
   251  		p4.From.Type = obj.TYPE_REG
   252  		p4.From.Reg = r1
   253  		p4.Reg = r1
   254  		p4.To.Type = obj.TYPE_REG
   255  		p4.To.Reg = loong64.REG_FCC0
   256  
   257  		p5 := s.Prog(loong64.ABFPF)
   258  		p5.To.Type = obj.TYPE_BRANCH
   259  
   260  		p6 := s.Prog(v.Op.Asm())
   261  		p6.From.Type = obj.TYPE_REG
   262  		p6.From.Reg = r1
   263  		p6.Reg = r0
   264  		p6.To.Type = obj.TYPE_REG
   265  		p6.To.Reg = out
   266  
   267  		nop := s.Prog(obj.ANOP)
   268  		p3.To.SetTarget(nop)
   269  		p5.To.SetTarget(nop)
   270  
   271  	case ssa.OpLOONG64SGT,
   272  		ssa.OpLOONG64SGTU:
   273  		p := s.Prog(v.Op.Asm())
   274  		p.From.Type = obj.TYPE_REG
   275  		p.From.Reg = v.Args[0].Reg()
   276  		p.Reg = v.Args[1].Reg()
   277  		p.To.Type = obj.TYPE_REG
   278  		p.To.Reg = v.Reg()
   279  	case ssa.OpLOONG64ADDVconst,
   280  		ssa.OpLOONG64ADDV16const,
   281  		ssa.OpLOONG64SUBVconst,
   282  		ssa.OpLOONG64ANDconst,
   283  		ssa.OpLOONG64ORconst,
   284  		ssa.OpLOONG64XORconst,
   285  		ssa.OpLOONG64SLLconst,
   286  		ssa.OpLOONG64SLLVconst,
   287  		ssa.OpLOONG64SRLconst,
   288  		ssa.OpLOONG64SRLVconst,
   289  		ssa.OpLOONG64SRAconst,
   290  		ssa.OpLOONG64SRAVconst,
   291  		ssa.OpLOONG64ROTRconst,
   292  		ssa.OpLOONG64ROTRVconst,
   293  		ssa.OpLOONG64SGTconst,
   294  		ssa.OpLOONG64SGTUconst:
   295  		p := s.Prog(v.Op.Asm())
   296  		p.From.Type = obj.TYPE_CONST
   297  		p.From.Offset = v.AuxInt
   298  		p.Reg = v.Args[0].Reg()
   299  		p.To.Type = obj.TYPE_REG
   300  		p.To.Reg = v.Reg()
   301  
   302  	case ssa.OpLOONG64NORconst:
   303  		// MOVV $const, Rtmp
   304  		// NOR  Rtmp, Rarg0, Rout
   305  		p := s.Prog(loong64.AMOVV)
   306  		p.From.Type = obj.TYPE_CONST
   307  		p.From.Offset = v.AuxInt
   308  		p.To.Type = obj.TYPE_REG
   309  		p.To.Reg = loong64.REGTMP
   310  
   311  		p2 := s.Prog(v.Op.Asm())
   312  		p2.From.Type = obj.TYPE_REG
   313  		p2.From.Reg = loong64.REGTMP
   314  		p2.Reg = v.Args[0].Reg()
   315  		p2.To.Type = obj.TYPE_REG
   316  		p2.To.Reg = v.Reg()
   317  
   318  	case ssa.OpLOONG64MOVVconst:
   319  		r := v.Reg()
   320  		p := s.Prog(v.Op.Asm())
   321  		p.From.Type = obj.TYPE_CONST
   322  		p.From.Offset = v.AuxInt
   323  		p.To.Type = obj.TYPE_REG
   324  		p.To.Reg = r
   325  		if isFPreg(r) {
   326  			// cannot move into FP or special registers, use TMP as intermediate
   327  			p.To.Reg = loong64.REGTMP
   328  			p = s.Prog(loong64.AMOVV)
   329  			p.From.Type = obj.TYPE_REG
   330  			p.From.Reg = loong64.REGTMP
   331  			p.To.Type = obj.TYPE_REG
   332  			p.To.Reg = r
   333  		}
   334  	case ssa.OpLOONG64MOVFconst,
   335  		ssa.OpLOONG64MOVDconst:
   336  		p := s.Prog(v.Op.Asm())
   337  		p.From.Type = obj.TYPE_FCONST
   338  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   339  		p.To.Type = obj.TYPE_REG
   340  		p.To.Reg = v.Reg()
   341  	case ssa.OpLOONG64CMPEQF,
   342  		ssa.OpLOONG64CMPEQD,
   343  		ssa.OpLOONG64CMPGEF,
   344  		ssa.OpLOONG64CMPGED,
   345  		ssa.OpLOONG64CMPGTF,
   346  		ssa.OpLOONG64CMPGTD:
   347  		p := s.Prog(v.Op.Asm())
   348  		p.From.Type = obj.TYPE_REG
   349  		p.From.Reg = v.Args[0].Reg()
   350  		p.Reg = v.Args[1].Reg()
   351  		p.To.Type = obj.TYPE_REG
   352  		p.To.Reg = loong64.REG_FCC0
   353  
   354  	case ssa.OpLOONG64FMADDF,
   355  		ssa.OpLOONG64FMADDD,
   356  		ssa.OpLOONG64FMSUBF,
   357  		ssa.OpLOONG64FMSUBD,
   358  		ssa.OpLOONG64FNMADDF,
   359  		ssa.OpLOONG64FNMADDD,
   360  		ssa.OpLOONG64FNMSUBF,
   361  		ssa.OpLOONG64FNMSUBD:
   362  		p := s.Prog(v.Op.Asm())
   363  		// r=(FMA x y z) -> FMADDD z, y, x, r
   364  		// the SSA operand order is for taking advantage of
   365  		// commutativity (that only applies for the first two operands)
   366  		r := v.Reg()
   367  		x := v.Args[0].Reg()
   368  		y := v.Args[1].Reg()
   369  		z := v.Args[2].Reg()
   370  		p.From.Type = obj.TYPE_REG
   371  		p.From.Reg = z
   372  		p.Reg = y
   373  		p.AddRestSourceReg(x)
   374  		p.To.Type = obj.TYPE_REG
   375  		p.To.Reg = r
   376  
   377  	case ssa.OpLOONG64MOVVaddr:
   378  		p := s.Prog(loong64.AMOVV)
   379  		p.From.Type = obj.TYPE_ADDR
   380  		p.From.Reg = v.Args[0].Reg()
   381  		var wantreg string
   382  		// MOVV $sym+off(base), R
   383  		// the assembler expands it as the following:
   384  		// - base is SP: add constant offset to SP (R3)
   385  		// when constant is large, tmp register (R30) may be used
   386  		// - base is SB: load external address with relocation
   387  		switch v.Aux.(type) {
   388  		default:
   389  			v.Fatalf("aux is of unknown type %T", v.Aux)
   390  		case *obj.LSym:
   391  			wantreg = "SB"
   392  			ssagen.AddAux(&p.From, v)
   393  		case *ir.Name:
   394  			wantreg = "SP"
   395  			ssagen.AddAux(&p.From, v)
   396  		case nil:
   397  			// No sym, just MOVV $off(SP), R
   398  			wantreg = "SP"
   399  			p.From.Offset = v.AuxInt
   400  		}
   401  		if reg := v.Args[0].RegName(); reg != wantreg {
   402  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   403  		}
   404  		p.To.Type = obj.TYPE_REG
   405  		p.To.Reg = v.Reg()
   406  
   407  	case ssa.OpLOONG64MOVBloadidx,
   408  		ssa.OpLOONG64MOVBUloadidx,
   409  		ssa.OpLOONG64MOVHloadidx,
   410  		ssa.OpLOONG64MOVHUloadidx,
   411  		ssa.OpLOONG64MOVWloadidx,
   412  		ssa.OpLOONG64MOVWUloadidx,
   413  		ssa.OpLOONG64MOVVloadidx,
   414  		ssa.OpLOONG64MOVFloadidx,
   415  		ssa.OpLOONG64MOVDloadidx:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_MEM
   418  		p.From.Name = obj.NAME_NONE
   419  		p.From.Reg = v.Args[0].Reg()
   420  		p.From.Index = v.Args[1].Reg()
   421  		p.To.Type = obj.TYPE_REG
   422  		p.To.Reg = v.Reg()
   423  
   424  	case ssa.OpLOONG64MOVBstoreidx,
   425  		ssa.OpLOONG64MOVHstoreidx,
   426  		ssa.OpLOONG64MOVWstoreidx,
   427  		ssa.OpLOONG64MOVVstoreidx,
   428  		ssa.OpLOONG64MOVFstoreidx,
   429  		ssa.OpLOONG64MOVDstoreidx:
   430  		p := s.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_REG
   432  		p.From.Reg = v.Args[2].Reg()
   433  		p.To.Type = obj.TYPE_MEM
   434  		p.To.Name = obj.NAME_NONE
   435  		p.To.Reg = v.Args[0].Reg()
   436  		p.To.Index = v.Args[1].Reg()
   437  
   438  	case ssa.OpLOONG64MOVBload,
   439  		ssa.OpLOONG64MOVBUload,
   440  		ssa.OpLOONG64MOVHload,
   441  		ssa.OpLOONG64MOVHUload,
   442  		ssa.OpLOONG64MOVWload,
   443  		ssa.OpLOONG64MOVWUload,
   444  		ssa.OpLOONG64MOVVload,
   445  		ssa.OpLOONG64MOVFload,
   446  		ssa.OpLOONG64MOVDload:
   447  		p := s.Prog(v.Op.Asm())
   448  		p.From.Type = obj.TYPE_MEM
   449  		p.From.Reg = v.Args[0].Reg()
   450  		ssagen.AddAux(&p.From, v)
   451  		p.To.Type = obj.TYPE_REG
   452  		p.To.Reg = v.Reg()
   453  	case ssa.OpLOONG64MOVBstore,
   454  		ssa.OpLOONG64MOVHstore,
   455  		ssa.OpLOONG64MOVWstore,
   456  		ssa.OpLOONG64MOVVstore,
   457  		ssa.OpLOONG64MOVFstore,
   458  		ssa.OpLOONG64MOVDstore:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_REG
   461  		p.From.Reg = v.Args[1].Reg()
   462  		p.To.Type = obj.TYPE_MEM
   463  		p.To.Reg = v.Args[0].Reg()
   464  		ssagen.AddAux(&p.To, v)
   465  	case ssa.OpLOONG64MOVBreg,
   466  		ssa.OpLOONG64MOVBUreg,
   467  		ssa.OpLOONG64MOVHreg,
   468  		ssa.OpLOONG64MOVHUreg,
   469  		ssa.OpLOONG64MOVWreg,
   470  		ssa.OpLOONG64MOVWUreg:
   471  		a := v.Args[0]
   472  		for a.Op == ssa.OpCopy || a.Op == ssa.OpLOONG64MOVVreg {
   473  			a = a.Args[0]
   474  		}
   475  		if a.Op == ssa.OpLoadReg && loong64.REG_R0 <= a.Reg() && a.Reg() <= loong64.REG_R31 {
   476  			// LoadReg from a narrower type does an extension, except loading
   477  			// to a floating point register. So only eliminate the extension
   478  			// if it is loaded to an integer register.
   479  
   480  			t := a.Type
   481  			switch {
   482  			case v.Op == ssa.OpLOONG64MOVBreg && t.Size() == 1 && t.IsSigned(),
   483  				v.Op == ssa.OpLOONG64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   484  				v.Op == ssa.OpLOONG64MOVHreg && t.Size() == 2 && t.IsSigned(),
   485  				v.Op == ssa.OpLOONG64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   486  				v.Op == ssa.OpLOONG64MOVWreg && t.Size() == 4 && t.IsSigned(),
   487  				v.Op == ssa.OpLOONG64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   488  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   489  				if v.Reg() == v.Args[0].Reg() {
   490  					return
   491  				}
   492  				p := s.Prog(loong64.AMOVV)
   493  				p.From.Type = obj.TYPE_REG
   494  				p.From.Reg = v.Args[0].Reg()
   495  				p.To.Type = obj.TYPE_REG
   496  				p.To.Reg = v.Reg()
   497  				return
   498  			default:
   499  			}
   500  		}
   501  		fallthrough
   502  
   503  	case ssa.OpLOONG64MOVWF,
   504  		ssa.OpLOONG64MOVWD,
   505  		ssa.OpLOONG64TRUNCFW,
   506  		ssa.OpLOONG64TRUNCDW,
   507  		ssa.OpLOONG64MOVVF,
   508  		ssa.OpLOONG64MOVVD,
   509  		ssa.OpLOONG64TRUNCFV,
   510  		ssa.OpLOONG64TRUNCDV,
   511  		ssa.OpLOONG64MOVFD,
   512  		ssa.OpLOONG64MOVDF,
   513  		ssa.OpLOONG64MOVWfpgp,
   514  		ssa.OpLOONG64MOVWgpfp,
   515  		ssa.OpLOONG64MOVVfpgp,
   516  		ssa.OpLOONG64MOVVgpfp,
   517  		ssa.OpLOONG64NEGF,
   518  		ssa.OpLOONG64NEGD,
   519  		ssa.OpLOONG64CLZW,
   520  		ssa.OpLOONG64CLZV,
   521  		ssa.OpLOONG64CTZW,
   522  		ssa.OpLOONG64CTZV,
   523  		ssa.OpLOONG64SQRTD,
   524  		ssa.OpLOONG64SQRTF,
   525  		ssa.OpLOONG64REVB2H,
   526  		ssa.OpLOONG64REVB2W,
   527  		ssa.OpLOONG64REVB4H,
   528  		ssa.OpLOONG64REVBV,
   529  		ssa.OpLOONG64BITREV4B,
   530  		ssa.OpLOONG64BITREVW,
   531  		ssa.OpLOONG64BITREVV,
   532  		ssa.OpLOONG64ABSD:
   533  		p := s.Prog(v.Op.Asm())
   534  		p.From.Type = obj.TYPE_REG
   535  		p.From.Reg = v.Args[0].Reg()
   536  		p.To.Type = obj.TYPE_REG
   537  		p.To.Reg = v.Reg()
   538  
   539  	case ssa.OpLOONG64VPCNT64,
   540  		ssa.OpLOONG64VPCNT32,
   541  		ssa.OpLOONG64VPCNT16:
   542  		p := s.Prog(v.Op.Asm())
   543  		p.From.Type = obj.TYPE_REG
   544  		p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   545  		p.To.Type = obj.TYPE_REG
   546  		p.To.Reg = ((v.Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   547  
   548  	case ssa.OpLOONG64NEGV:
   549  		// SUB from REGZERO
   550  		p := s.Prog(loong64.ASUBVU)
   551  		p.From.Type = obj.TYPE_REG
   552  		p.From.Reg = v.Args[0].Reg()
   553  		p.Reg = loong64.REGZERO
   554  		p.To.Type = obj.TYPE_REG
   555  		p.To.Reg = v.Reg()
   556  
   557  	case ssa.OpLOONG64LoweredZero:
   558  		ptrReg := v.Args[0].Reg()
   559  		n := v.AuxInt
   560  		if n < 16 {
   561  			v.Fatalf("Zero too small %d", n)
   562  		}
   563  
   564  		// Generate Zeroing instructions.
   565  		var off int64
   566  		for n >= 8 {
   567  			// MOVV     ZR, off(ptrReg)
   568  			zero8(s, ptrReg, off)
   569  			off += 8
   570  			n -= 8
   571  		}
   572  		if n != 0 {
   573  			// MOVV     ZR, off+n-8(ptrReg)
   574  			zero8(s, ptrReg, off+n-8)
   575  		}
   576  	case ssa.OpLOONG64LoweredZeroLoop:
   577  		ptrReg := v.Args[0].Reg()
   578  		countReg := v.RegTmp()
   579  		flagReg := int16(loong64.REGTMP)
   580  		var off int64
   581  		n := v.AuxInt
   582  		loopSize := int64(64)
   583  		if n < 3*loopSize {
   584  			// - a loop count of 0 won't work.
   585  			// - a loop count of 1 is useless.
   586  			// - a loop count of 2 is a code size ~tie
   587  			//     4 instructions to implement the loop
   588  			//     8 instructions in the loop body
   589  			//   vs
   590  			//     16 instuctions in the straightline code
   591  			//   Might as well use straightline code.
   592  			v.Fatalf("ZeroLoop size too small %d", n)
   593  		}
   594  
   595  		//    MOVV    $n/loopSize, countReg
   596  		//    MOVBU   ir.Syms.Loong64HasLSX, flagReg
   597  		//    BNE     flagReg, lsxInit
   598  		// genericInit:
   599  		//    for off = 0; off < loopSize; off += 8 {
   600  		//            zero8(s, ptrReg, off)
   601  		//    }
   602  		//    ADDV    $loopSize, ptrReg
   603  		//    SUBV    $1, countReg
   604  		//    BNE     countReg, genericInit
   605  		//    JMP     tail
   606  		// lsxInit:
   607  		//    VXORV   V31, V31, V31, v31 = 0
   608  		//    for off = 0; off < loopSize; off += 16 {
   609  		//            zero16(s, V31, ptrReg, off)
   610  		//    }
   611  		//    ADDV    $loopSize, ptrReg
   612  		//    SUBV    $1, countReg
   613  		//    BNE     countReg, lsxInit
   614  		// tail:
   615  		//    n %= loopSize
   616  		//    for off = 0; n >= 8; off += 8, n -= 8 {
   617  		//            zero8(s, ptrReg, off)
   618  		//    }
   619  		//
   620  		//    if n != 0 {
   621  		//           zero8(s, ptrReg, off+n-8)
   622  		//    }
   623  
   624  		p1 := s.Prog(loong64.AMOVV)
   625  		p1.From.Type = obj.TYPE_CONST
   626  		p1.From.Offset = n / loopSize
   627  		p1.To.Type = obj.TYPE_REG
   628  		p1.To.Reg = countReg
   629  
   630  		p2 := s.Prog(loong64.AMOVBU)
   631  		p2.From.Type = obj.TYPE_MEM
   632  		p2.From.Name = obj.NAME_EXTERN
   633  		p2.From.Sym = ir.Syms.Loong64HasLSX
   634  		p2.To.Type = obj.TYPE_REG
   635  		p2.To.Reg = flagReg
   636  
   637  		p3 := s.Prog(loong64.ABNE)
   638  		p3.From.Type = obj.TYPE_REG
   639  		p3.From.Reg = flagReg
   640  		p3.To.Type = obj.TYPE_BRANCH
   641  
   642  		for off = 0; off < loopSize; off += 8 {
   643  			zero8(s, ptrReg, off)
   644  		}
   645  
   646  		p4 := s.Prog(loong64.AADDV)
   647  		p4.From.Type = obj.TYPE_CONST
   648  		p4.From.Offset = loopSize
   649  		p4.To.Type = obj.TYPE_REG
   650  		p4.To.Reg = ptrReg
   651  
   652  		p5 := s.Prog(loong64.ASUBV)
   653  		p5.From.Type = obj.TYPE_CONST
   654  		p5.From.Offset = 1
   655  		p5.To.Type = obj.TYPE_REG
   656  		p5.To.Reg = countReg
   657  
   658  		p6 := s.Prog(loong64.ABNE)
   659  		p6.From.Type = obj.TYPE_REG
   660  		p6.From.Reg = countReg
   661  		p6.To.Type = obj.TYPE_BRANCH
   662  		p6.To.SetTarget(p3.Link)
   663  
   664  		p7 := s.Prog(obj.AJMP)
   665  		p7.To.Type = obj.TYPE_BRANCH
   666  
   667  		p8 := s.Prog(loong64.AVXORV)
   668  		p8.From.Type = obj.TYPE_REG
   669  		p8.From.Reg = loong64.REG_V31
   670  		p8.To.Type = obj.TYPE_REG
   671  		p8.To.Reg = loong64.REG_V31
   672  		p3.To.SetTarget(p8)
   673  
   674  		for off = 0; off < loopSize; off += 16 {
   675  			zero16(s, loong64.REG_V31, ptrReg, off)
   676  		}
   677  
   678  		p9 := s.Prog(loong64.AADDV)
   679  		p9.From.Type = obj.TYPE_CONST
   680  		p9.From.Offset = loopSize
   681  		p9.To.Type = obj.TYPE_REG
   682  		p9.To.Reg = ptrReg
   683  
   684  		p10 := s.Prog(loong64.ASUBV)
   685  		p10.From.Type = obj.TYPE_CONST
   686  		p10.From.Offset = 1
   687  		p10.To.Type = obj.TYPE_REG
   688  		p10.To.Reg = countReg
   689  
   690  		p11 := s.Prog(loong64.ABNE)
   691  		p11.From.Type = obj.TYPE_REG
   692  		p11.From.Reg = countReg
   693  		p11.To.Type = obj.TYPE_BRANCH
   694  		p11.To.SetTarget(p8.Link)
   695  
   696  		p12 := s.Prog(obj.ANOP)
   697  		p7.To.SetTarget(p12)
   698  
   699  		// Multiples of the loop size are now done.
   700  		n %= loopSize
   701  		// Write any fractional portion.
   702  		for off = 0; n >= 8; off += 8 {
   703  			// MOVV   ZR, off(ptrReg)
   704  			zero8(s, ptrReg, off)
   705  			n -= 8
   706  		}
   707  
   708  		if n != 0 {
   709  			zero8(s, ptrReg, off+n-8)
   710  		}
   711  
   712  	case ssa.OpLOONG64LoweredMove:
   713  		dstReg := v.Args[0].Reg()
   714  		srcReg := v.Args[1].Reg()
   715  		if dstReg == srcReg {
   716  			break
   717  		}
   718  		tmpReg := int16(loong64.REG_R20)
   719  		n := v.AuxInt
   720  		if n < 16 {
   721  			v.Fatalf("Move too small %d", n)
   722  		}
   723  
   724  		var off int64
   725  		for n >= 8 {
   726  			// MOVV     off(srcReg), tmpReg
   727  			// MOVV     tmpReg, off(dstReg)
   728  			move8(s, srcReg, dstReg, tmpReg, off)
   729  			off += 8
   730  			n -= 8
   731  		}
   732  
   733  		if n != 0 {
   734  			// MOVV     off+n-8(srcReg), tmpReg
   735  			// MOVV     tmpReg, off+n-8(srcReg)
   736  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   737  		}
   738  	case ssa.OpLOONG64LoweredMoveLoop:
   739  		dstReg := v.Args[0].Reg()
   740  		srcReg := v.Args[1].Reg()
   741  		if dstReg == srcReg {
   742  			break
   743  		}
   744  		countReg := int16(loong64.REG_R20)
   745  		tmpReg := int16(loong64.REG_R21)
   746  		var off int64
   747  		n := v.AuxInt
   748  		loopSize := int64(64)
   749  		if n < 3*loopSize {
   750  			// - a loop count of 0 won't work.
   751  			// - a loop count of 1 is useless.
   752  			// - a loop count of 2 is a code size ~tie
   753  			//     4 instructions to implement the loop
   754  			//     8 instructions in the loop body
   755  			//   vs
   756  			//     16 instructions in the straightline code
   757  			//   Might as well use straightline code.
   758  			v.Fatalf("MoveLoop size too small %d", n)
   759  		}
   760  
   761  		// Put iteration count in a register.
   762  		//   MOVV     $n/loopSize, countReg
   763  		p := s.Prog(loong64.AMOVV)
   764  		p.From.Type = obj.TYPE_CONST
   765  		p.From.Offset = n / loopSize
   766  		p.To.Type = obj.TYPE_REG
   767  		p.To.Reg = countReg
   768  		cntInit := p
   769  
   770  		// Move loopSize bytes starting at srcReg to dstReg.
   771  		for range loopSize / 8 {
   772  			// MOVV     off(srcReg), tmpReg
   773  			// MOVV     tmpReg, off(dstReg)
   774  			move8(s, srcReg, dstReg, tmpReg, off)
   775  			off += 8
   776  		}
   777  
   778  		// Increment srcReg and destReg by loopSize.
   779  		//   ADDV     $loopSize, srcReg
   780  		p = s.Prog(loong64.AADDV)
   781  		p.From.Type = obj.TYPE_CONST
   782  		p.From.Offset = loopSize
   783  		p.To.Type = obj.TYPE_REG
   784  		p.To.Reg = srcReg
   785  		//   ADDV     $loopSize, dstReg
   786  		p = s.Prog(loong64.AADDV)
   787  		p.From.Type = obj.TYPE_CONST
   788  		p.From.Offset = loopSize
   789  		p.To.Type = obj.TYPE_REG
   790  		p.To.Reg = dstReg
   791  
   792  		// Decrement loop count.
   793  		//   SUBV     $1, countReg
   794  		p = s.Prog(loong64.ASUBV)
   795  		p.From.Type = obj.TYPE_CONST
   796  		p.From.Offset = 1
   797  		p.To.Type = obj.TYPE_REG
   798  		p.To.Reg = countReg
   799  
   800  		// Jump to loop header if we're not done yet.
   801  		//   BNE     countReg, loop header
   802  		p = s.Prog(loong64.ABNE)
   803  		p.From.Type = obj.TYPE_REG
   804  		p.From.Reg = countReg
   805  		p.To.Type = obj.TYPE_BRANCH
   806  		p.To.SetTarget(cntInit.Link)
   807  
   808  		// Multiples of the loop size are now done.
   809  		n %= loopSize
   810  
   811  		off = 0
   812  		// Copy any fractional portion.
   813  		for n >= 8 {
   814  			// MOVV     off(srcReg), tmpReg
   815  			// MOVV     tmpReg, off(dstReg)
   816  			move8(s, srcReg, dstReg, tmpReg, off)
   817  			off += 8
   818  			n -= 8
   819  		}
   820  
   821  		if n != 0 {
   822  			// MOVV     off+n-8(srcReg), tmpReg
   823  			// MOVV     tmpReg, off+n-8(srcReg)
   824  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   825  		}
   826  
   827  	case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter:
   828  		s.Call(v)
   829  	case ssa.OpLOONG64CALLtail:
   830  		s.TailCall(v)
   831  	case ssa.OpLOONG64LoweredWB:
   832  		p := s.Prog(obj.ACALL)
   833  		p.To.Type = obj.TYPE_MEM
   834  		p.To.Name = obj.NAME_EXTERN
   835  		// AuxInt encodes how many buffer entries we need.
   836  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
   837  
   838  	case ssa.OpLOONG64LoweredPubBarrier:
   839  		// DBAR 0x1A
   840  		p := s.Prog(v.Op.Asm())
   841  		p.From.Type = obj.TYPE_CONST
   842  		p.From.Offset = 0x1A
   843  
   844  	case ssa.OpLOONG64LoweredPanicBoundsRR, ssa.OpLOONG64LoweredPanicBoundsRC, ssa.OpLOONG64LoweredPanicBoundsCR, ssa.OpLOONG64LoweredPanicBoundsCC:
   845  		// Compute the constant we put in the PCData entry for this call.
   846  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
   847  		xIsReg := false
   848  		yIsReg := false
   849  		xVal := 0
   850  		yVal := 0
   851  		switch v.Op {
   852  		case ssa.OpLOONG64LoweredPanicBoundsRR:
   853  			xIsReg = true
   854  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   855  			yIsReg = true
   856  			yVal = int(v.Args[1].Reg() - loong64.REG_R4)
   857  		case ssa.OpLOONG64LoweredPanicBoundsRC:
   858  			xIsReg = true
   859  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   860  			c := v.Aux.(ssa.PanicBoundsC).C
   861  			if c >= 0 && c <= abi.BoundsMaxConst {
   862  				yVal = int(c)
   863  			} else {
   864  				// Move constant to a register
   865  				yIsReg = true
   866  				if yVal == xVal {
   867  					yVal = 1
   868  				}
   869  				p := s.Prog(loong64.AMOVV)
   870  				p.From.Type = obj.TYPE_CONST
   871  				p.From.Offset = c
   872  				p.To.Type = obj.TYPE_REG
   873  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   874  			}
   875  		case ssa.OpLOONG64LoweredPanicBoundsCR:
   876  			yIsReg = true
   877  			yVal = int(v.Args[0].Reg() - loong64.REG_R4)
   878  			c := v.Aux.(ssa.PanicBoundsC).C
   879  			if c >= 0 && c <= abi.BoundsMaxConst {
   880  				xVal = int(c)
   881  			} else {
   882  				// Move constant to a register
   883  				xIsReg = true
   884  				if xVal == yVal {
   885  					xVal = 1
   886  				}
   887  				p := s.Prog(loong64.AMOVV)
   888  				p.From.Type = obj.TYPE_CONST
   889  				p.From.Offset = c
   890  				p.To.Type = obj.TYPE_REG
   891  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   892  			}
   893  		case ssa.OpLOONG64LoweredPanicBoundsCC:
   894  			c := v.Aux.(ssa.PanicBoundsCC).Cx
   895  			if c >= 0 && c <= abi.BoundsMaxConst {
   896  				xVal = int(c)
   897  			} else {
   898  				// Move constant to a register
   899  				xIsReg = true
   900  				p := s.Prog(loong64.AMOVV)
   901  				p.From.Type = obj.TYPE_CONST
   902  				p.From.Offset = c
   903  				p.To.Type = obj.TYPE_REG
   904  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   905  			}
   906  			c = v.Aux.(ssa.PanicBoundsCC).Cy
   907  			if c >= 0 && c <= abi.BoundsMaxConst {
   908  				yVal = int(c)
   909  			} else {
   910  				// Move constant to a register
   911  				yIsReg = true
   912  				yVal = 1
   913  				p := s.Prog(loong64.AMOVV)
   914  				p.From.Type = obj.TYPE_CONST
   915  				p.From.Offset = c
   916  				p.To.Type = obj.TYPE_REG
   917  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   918  			}
   919  		}
   920  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
   921  
   922  		p := s.Prog(obj.APCDATA)
   923  		p.From.SetConst(abi.PCDATA_PanicBounds)
   924  		p.To.SetConst(int64(c))
   925  		p = s.Prog(obj.ACALL)
   926  		p.To.Type = obj.TYPE_MEM
   927  		p.To.Name = obj.NAME_EXTERN
   928  		p.To.Sym = ir.Syms.PanicBounds
   929  
   930  	case ssa.OpLOONG64LoweredAtomicLoad8, ssa.OpLOONG64LoweredAtomicLoad32, ssa.OpLOONG64LoweredAtomicLoad64:
   931  		// MOVB	(Rarg0), Rout
   932  		// DBAR	0x14
   933  		as := loong64.AMOVV
   934  		switch v.Op {
   935  		case ssa.OpLOONG64LoweredAtomicLoad8:
   936  			as = loong64.AMOVB
   937  		case ssa.OpLOONG64LoweredAtomicLoad32:
   938  			as = loong64.AMOVW
   939  		}
   940  		p := s.Prog(as)
   941  		p.From.Type = obj.TYPE_MEM
   942  		p.From.Reg = v.Args[0].Reg()
   943  		p.To.Type = obj.TYPE_REG
   944  		p.To.Reg = v.Reg0()
   945  		p1 := s.Prog(loong64.ADBAR)
   946  		p1.From.Type = obj.TYPE_CONST
   947  		p1.From.Offset = 0x14
   948  
   949  	case ssa.OpLOONG64LoweredAtomicStore8,
   950  		ssa.OpLOONG64LoweredAtomicStore32,
   951  		ssa.OpLOONG64LoweredAtomicStore64:
   952  		// DBAR 0x12
   953  		// MOVx (Rarg1), Rout
   954  		// DBAR 0x18
   955  		movx := loong64.AMOVV
   956  		switch v.Op {
   957  		case ssa.OpLOONG64LoweredAtomicStore8:
   958  			movx = loong64.AMOVB
   959  		case ssa.OpLOONG64LoweredAtomicStore32:
   960  			movx = loong64.AMOVW
   961  		}
   962  		p := s.Prog(loong64.ADBAR)
   963  		p.From.Type = obj.TYPE_CONST
   964  		p.From.Offset = 0x12
   965  
   966  		p1 := s.Prog(movx)
   967  		p1.From.Type = obj.TYPE_REG
   968  		p1.From.Reg = v.Args[1].Reg()
   969  		p1.To.Type = obj.TYPE_MEM
   970  		p1.To.Reg = v.Args[0].Reg()
   971  
   972  		p2 := s.Prog(loong64.ADBAR)
   973  		p2.From.Type = obj.TYPE_CONST
   974  		p2.From.Offset = 0x18
   975  
   976  	case ssa.OpLOONG64LoweredAtomicStore8Variant,
   977  		ssa.OpLOONG64LoweredAtomicStore32Variant,
   978  		ssa.OpLOONG64LoweredAtomicStore64Variant:
   979  		//AMSWAPx  Rarg1, (Rarg0), Rout
   980  		amswapx := loong64.AAMSWAPDBV
   981  		switch v.Op {
   982  		case ssa.OpLOONG64LoweredAtomicStore32Variant:
   983  			amswapx = loong64.AAMSWAPDBW
   984  		case ssa.OpLOONG64LoweredAtomicStore8Variant:
   985  			amswapx = loong64.AAMSWAPDBB
   986  		}
   987  		p := s.Prog(amswapx)
   988  		p.From.Type = obj.TYPE_REG
   989  		p.From.Reg = v.Args[1].Reg()
   990  		p.To.Type = obj.TYPE_MEM
   991  		p.To.Reg = v.Args[0].Reg()
   992  		p.RegTo2 = loong64.REGZERO
   993  
   994  	case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64:
   995  		// AMSWAPx	Rarg1, (Rarg0), Rout
   996  		amswapx := loong64.AAMSWAPDBV
   997  		if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 {
   998  			amswapx = loong64.AAMSWAPDBW
   999  		}
  1000  		p := s.Prog(amswapx)
  1001  		p.From.Type = obj.TYPE_REG
  1002  		p.From.Reg = v.Args[1].Reg()
  1003  		p.To.Type = obj.TYPE_MEM
  1004  		p.To.Reg = v.Args[0].Reg()
  1005  		p.RegTo2 = v.Reg0()
  1006  
  1007  	case ssa.OpLOONG64LoweredAtomicExchange8Variant:
  1008  		// AMSWAPDBB	Rarg1, (Rarg0), Rout
  1009  		p := s.Prog(loong64.AAMSWAPDBB)
  1010  		p.From.Type = obj.TYPE_REG
  1011  		p.From.Reg = v.Args[1].Reg()
  1012  		p.To.Type = obj.TYPE_MEM
  1013  		p.To.Reg = v.Args[0].Reg()
  1014  		p.RegTo2 = v.Reg0()
  1015  
  1016  	case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
  1017  		// AMADDx  Rarg1, (Rarg0), Rout
  1018  		// ADDV    Rarg1, Rout, Rout
  1019  		amaddx := loong64.AAMADDDBV
  1020  		addx := loong64.AADDV
  1021  		if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 {
  1022  			amaddx = loong64.AAMADDDBW
  1023  		}
  1024  		p := s.Prog(amaddx)
  1025  		p.From.Type = obj.TYPE_REG
  1026  		p.From.Reg = v.Args[1].Reg()
  1027  		p.To.Type = obj.TYPE_MEM
  1028  		p.To.Reg = v.Args[0].Reg()
  1029  		p.RegTo2 = v.Reg0()
  1030  
  1031  		p1 := s.Prog(addx)
  1032  		p1.From.Type = obj.TYPE_REG
  1033  		p1.From.Reg = v.Args[1].Reg()
  1034  		p1.Reg = v.Reg0()
  1035  		p1.To.Type = obj.TYPE_REG
  1036  		p1.To.Reg = v.Reg0()
  1037  
  1038  	case ssa.OpLOONG64LoweredAtomicCas32, ssa.OpLOONG64LoweredAtomicCas64:
  1039  		// MOVV $0, Rout
  1040  		// DBAR 0x14
  1041  		// LL	(Rarg0), Rtmp
  1042  		// BNE	Rtmp, Rarg1, 4(PC)
  1043  		// MOVV Rarg2, Rout
  1044  		// SC	Rout, (Rarg0)
  1045  		// BEQ	Rout, -4(PC)
  1046  		// DBAR 0x12
  1047  		ll := loong64.ALLV
  1048  		sc := loong64.ASCV
  1049  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32 {
  1050  			ll = loong64.ALL
  1051  			sc = loong64.ASC
  1052  		}
  1053  
  1054  		p := s.Prog(loong64.AMOVV)
  1055  		p.From.Type = obj.TYPE_REG
  1056  		p.From.Reg = loong64.REGZERO
  1057  		p.To.Type = obj.TYPE_REG
  1058  		p.To.Reg = v.Reg0()
  1059  
  1060  		p1 := s.Prog(loong64.ADBAR)
  1061  		p1.From.Type = obj.TYPE_CONST
  1062  		p1.From.Offset = 0x14
  1063  
  1064  		p2 := s.Prog(ll)
  1065  		p2.From.Type = obj.TYPE_MEM
  1066  		p2.From.Reg = v.Args[0].Reg()
  1067  		p2.To.Type = obj.TYPE_REG
  1068  		p2.To.Reg = loong64.REGTMP
  1069  
  1070  		p3 := s.Prog(loong64.ABNE)
  1071  		p3.From.Type = obj.TYPE_REG
  1072  		p3.From.Reg = v.Args[1].Reg()
  1073  		p3.Reg = loong64.REGTMP
  1074  		p3.To.Type = obj.TYPE_BRANCH
  1075  
  1076  		p4 := s.Prog(loong64.AMOVV)
  1077  		p4.From.Type = obj.TYPE_REG
  1078  		p4.From.Reg = v.Args[2].Reg()
  1079  		p4.To.Type = obj.TYPE_REG
  1080  		p4.To.Reg = v.Reg0()
  1081  
  1082  		p5 := s.Prog(sc)
  1083  		p5.From.Type = obj.TYPE_REG
  1084  		p5.From.Reg = v.Reg0()
  1085  		p5.To.Type = obj.TYPE_MEM
  1086  		p5.To.Reg = v.Args[0].Reg()
  1087  
  1088  		p6 := s.Prog(loong64.ABEQ)
  1089  		p6.From.Type = obj.TYPE_REG
  1090  		p6.From.Reg = v.Reg0()
  1091  		p6.To.Type = obj.TYPE_BRANCH
  1092  		p6.To.SetTarget(p2)
  1093  
  1094  		p7 := s.Prog(loong64.ADBAR)
  1095  		p7.From.Type = obj.TYPE_CONST
  1096  		p7.From.Offset = 0x12
  1097  		p3.To.SetTarget(p7)
  1098  
  1099  	case ssa.OpLOONG64LoweredAtomicAnd32,
  1100  		ssa.OpLOONG64LoweredAtomicOr32:
  1101  		// AM{AND,OR}DBx  Rarg1, (Rarg0), RegZero
  1102  		p := s.Prog(v.Op.Asm())
  1103  		p.From.Type = obj.TYPE_REG
  1104  		p.From.Reg = v.Args[1].Reg()
  1105  		p.To.Type = obj.TYPE_MEM
  1106  		p.To.Reg = v.Args[0].Reg()
  1107  		p.RegTo2 = loong64.REGZERO
  1108  
  1109  	case ssa.OpLOONG64LoweredAtomicAnd32value,
  1110  		ssa.OpLOONG64LoweredAtomicAnd64value,
  1111  		ssa.OpLOONG64LoweredAtomicOr64value,
  1112  		ssa.OpLOONG64LoweredAtomicOr32value:
  1113  		// AM{AND,OR}DBx  Rarg1, (Rarg0), Rout
  1114  		p := s.Prog(v.Op.Asm())
  1115  		p.From.Type = obj.TYPE_REG
  1116  		p.From.Reg = v.Args[1].Reg()
  1117  		p.To.Type = obj.TYPE_MEM
  1118  		p.To.Reg = v.Args[0].Reg()
  1119  		p.RegTo2 = v.Reg0()
  1120  
  1121  	case ssa.OpLOONG64LoweredAtomicCas64Variant, ssa.OpLOONG64LoweredAtomicCas32Variant:
  1122  		// MOVV         $0, Rout
  1123  		// MOVV         Rarg1, Rtmp
  1124  		// AMCASDBx     Rarg2, (Rarg0), Rtmp
  1125  		// BNE          Rarg1, Rtmp, 2(PC)
  1126  		// MOVV         $1, Rout
  1127  		// NOP
  1128  
  1129  		amcasx := loong64.AAMCASDBV
  1130  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32Variant {
  1131  			amcasx = loong64.AAMCASDBW
  1132  		}
  1133  
  1134  		p := s.Prog(loong64.AMOVV)
  1135  		p.From.Type = obj.TYPE_REG
  1136  		p.From.Reg = loong64.REGZERO
  1137  		p.To.Type = obj.TYPE_REG
  1138  		p.To.Reg = v.Reg0()
  1139  
  1140  		p1 := s.Prog(loong64.AMOVV)
  1141  		p1.From.Type = obj.TYPE_REG
  1142  		p1.From.Reg = v.Args[1].Reg()
  1143  		p1.To.Type = obj.TYPE_REG
  1144  		p1.To.Reg = loong64.REGTMP
  1145  
  1146  		p2 := s.Prog(amcasx)
  1147  		p2.From.Type = obj.TYPE_REG
  1148  		p2.From.Reg = v.Args[2].Reg()
  1149  		p2.To.Type = obj.TYPE_MEM
  1150  		p2.To.Reg = v.Args[0].Reg()
  1151  		p2.RegTo2 = loong64.REGTMP
  1152  
  1153  		p3 := s.Prog(loong64.ABNE)
  1154  		p3.From.Type = obj.TYPE_REG
  1155  		p3.From.Reg = v.Args[1].Reg()
  1156  		p3.Reg = loong64.REGTMP
  1157  		p3.To.Type = obj.TYPE_BRANCH
  1158  
  1159  		p4 := s.Prog(loong64.AMOVV)
  1160  		p4.From.Type = obj.TYPE_CONST
  1161  		p4.From.Offset = 0x1
  1162  		p4.To.Type = obj.TYPE_REG
  1163  		p4.To.Reg = v.Reg0()
  1164  
  1165  		p5 := s.Prog(obj.ANOP)
  1166  		p3.To.SetTarget(p5)
  1167  
  1168  	case ssa.OpLOONG64LoweredNilCheck:
  1169  		// Issue a load which will fault if arg is nil.
  1170  		p := s.Prog(loong64.AMOVB)
  1171  		p.From.Type = obj.TYPE_MEM
  1172  		p.From.Reg = v.Args[0].Reg()
  1173  		ssagen.AddAux(&p.From, v)
  1174  		p.To.Type = obj.TYPE_REG
  1175  		p.To.Reg = loong64.REGTMP
  1176  		if logopt.Enabled() {
  1177  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1178  		}
  1179  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1180  			base.WarnfAt(v.Pos, "generated nil check")
  1181  		}
  1182  	case ssa.OpLOONG64FPFlagTrue,
  1183  		ssa.OpLOONG64FPFlagFalse:
  1184  		// MOVV	$0, r
  1185  		// BFPF	2(PC)
  1186  		// MOVV	$1, r
  1187  		branch := loong64.ABFPF
  1188  		if v.Op == ssa.OpLOONG64FPFlagFalse {
  1189  			branch = loong64.ABFPT
  1190  		}
  1191  		p := s.Prog(loong64.AMOVV)
  1192  		p.From.Type = obj.TYPE_REG
  1193  		p.From.Reg = loong64.REGZERO
  1194  		p.To.Type = obj.TYPE_REG
  1195  		p.To.Reg = v.Reg()
  1196  		p2 := s.Prog(branch)
  1197  		p2.To.Type = obj.TYPE_BRANCH
  1198  		p3 := s.Prog(loong64.AMOVV)
  1199  		p3.From.Type = obj.TYPE_CONST
  1200  		p3.From.Offset = 1
  1201  		p3.To.Type = obj.TYPE_REG
  1202  		p3.To.Reg = v.Reg()
  1203  		p4 := s.Prog(obj.ANOP) // not a machine instruction, for branch to land
  1204  		p2.To.SetTarget(p4)
  1205  	case ssa.OpLOONG64LoweredGetClosurePtr:
  1206  		// Closure pointer is R22 (loong64.REGCTXT).
  1207  		ssagen.CheckLoweredGetClosurePtr(v)
  1208  	case ssa.OpLOONG64LoweredGetCallerSP:
  1209  		// caller's SP is FixedFrameSize below the address of the first arg
  1210  		p := s.Prog(loong64.AMOVV)
  1211  		p.From.Type = obj.TYPE_ADDR
  1212  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1213  		p.From.Name = obj.NAME_PARAM
  1214  		p.To.Type = obj.TYPE_REG
  1215  		p.To.Reg = v.Reg()
  1216  	case ssa.OpLOONG64LoweredGetCallerPC:
  1217  		p := s.Prog(obj.AGETCALLERPC)
  1218  		p.To.Type = obj.TYPE_REG
  1219  		p.To.Reg = v.Reg()
  1220  	case ssa.OpLOONG64MASKEQZ, ssa.OpLOONG64MASKNEZ:
  1221  		p := s.Prog(v.Op.Asm())
  1222  		p.From.Type = obj.TYPE_REG
  1223  		p.From.Reg = v.Args[1].Reg()
  1224  		p.Reg = v.Args[0].Reg()
  1225  		p.To.Type = obj.TYPE_REG
  1226  		p.To.Reg = v.Reg()
  1227  
  1228  	case ssa.OpLOONG64PRELD:
  1229  		// PRELD (Rarg0), hint
  1230  		p := s.Prog(v.Op.Asm())
  1231  		p.From.Type = obj.TYPE_MEM
  1232  		p.From.Reg = v.Args[0].Reg()
  1233  		p.AddRestSourceConst(v.AuxInt & 0x1f)
  1234  
  1235  	case ssa.OpLOONG64PRELDX:
  1236  		// PRELDX (Rarg0), $n, $hint
  1237  		p := s.Prog(v.Op.Asm())
  1238  		p.From.Type = obj.TYPE_MEM
  1239  		p.From.Reg = v.Args[0].Reg()
  1240  		p.AddRestSourceArgs([]obj.Addr{
  1241  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 5) & 0x1fffffffff},
  1242  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 0) & 0x1f},
  1243  		})
  1244  
  1245  	case ssa.OpLOONG64ADDshiftLLV:
  1246  		// ADDshiftLLV Rarg0, Rarg1, $shift
  1247  		// ALSLV $shift, Rarg1, Rarg0, Rtmp
  1248  		p := s.Prog(v.Op.Asm())
  1249  		p.From.Type = obj.TYPE_CONST
  1250  		p.From.Offset = v.AuxInt
  1251  		p.Reg = v.Args[1].Reg()
  1252  		p.AddRestSourceReg(v.Args[0].Reg())
  1253  		p.To.Type = obj.TYPE_REG
  1254  		p.To.Reg = v.Reg()
  1255  
  1256  	case ssa.OpClobber, ssa.OpClobberReg:
  1257  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1258  	default:
  1259  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1260  	}
  1261  }
  1262  
  1263  var blockJump = map[ssa.BlockKind]struct {
  1264  	asm, invasm obj.As
  1265  }{
  1266  	ssa.BlockLOONG64EQZ:  {loong64.ABEQ, loong64.ABNE},
  1267  	ssa.BlockLOONG64NEZ:  {loong64.ABNE, loong64.ABEQ},
  1268  	ssa.BlockLOONG64LTZ:  {loong64.ABLTZ, loong64.ABGEZ},
  1269  	ssa.BlockLOONG64GEZ:  {loong64.ABGEZ, loong64.ABLTZ},
  1270  	ssa.BlockLOONG64LEZ:  {loong64.ABLEZ, loong64.ABGTZ},
  1271  	ssa.BlockLOONG64GTZ:  {loong64.ABGTZ, loong64.ABLEZ},
  1272  	ssa.BlockLOONG64FPT:  {loong64.ABFPT, loong64.ABFPF},
  1273  	ssa.BlockLOONG64FPF:  {loong64.ABFPF, loong64.ABFPT},
  1274  	ssa.BlockLOONG64BEQ:  {loong64.ABEQ, loong64.ABNE},
  1275  	ssa.BlockLOONG64BNE:  {loong64.ABNE, loong64.ABEQ},
  1276  	ssa.BlockLOONG64BGE:  {loong64.ABGE, loong64.ABLT},
  1277  	ssa.BlockLOONG64BLT:  {loong64.ABLT, loong64.ABGE},
  1278  	ssa.BlockLOONG64BLTU: {loong64.ABLTU, loong64.ABGEU},
  1279  	ssa.BlockLOONG64BGEU: {loong64.ABGEU, loong64.ABLTU},
  1280  }
  1281  
  1282  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1283  	switch b.Kind {
  1284  	case ssa.BlockPlain, ssa.BlockDefer:
  1285  		if b.Succs[0].Block() != next {
  1286  			p := s.Prog(obj.AJMP)
  1287  			p.To.Type = obj.TYPE_BRANCH
  1288  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1289  		}
  1290  	case ssa.BlockExit, ssa.BlockRetJmp:
  1291  	case ssa.BlockRet:
  1292  		s.Prog(obj.ARET)
  1293  	case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1294  		ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1295  		ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1296  		ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1297  		ssa.BlockLOONG64BLT, ssa.BlockLOONG64BGE,
  1298  		ssa.BlockLOONG64BLTU, ssa.BlockLOONG64BGEU,
  1299  		ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1300  		jmp := blockJump[b.Kind]
  1301  		var p *obj.Prog
  1302  		switch next {
  1303  		case b.Succs[0].Block():
  1304  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1305  		case b.Succs[1].Block():
  1306  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1307  		default:
  1308  			if b.Likely != ssa.BranchUnlikely {
  1309  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1310  				s.Br(obj.AJMP, b.Succs[1].Block())
  1311  			} else {
  1312  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1313  				s.Br(obj.AJMP, b.Succs[0].Block())
  1314  			}
  1315  		}
  1316  		switch b.Kind {
  1317  		case ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1318  			ssa.BlockLOONG64BGE, ssa.BlockLOONG64BLT,
  1319  			ssa.BlockLOONG64BGEU, ssa.BlockLOONG64BLTU:
  1320  			p.From.Type = obj.TYPE_REG
  1321  			p.From.Reg = b.Controls[0].Reg()
  1322  			p.Reg = b.Controls[1].Reg()
  1323  		case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1324  			ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1325  			ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1326  			ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1327  			if !b.Controls[0].Type.IsFlags() {
  1328  				p.From.Type = obj.TYPE_REG
  1329  				p.From.Reg = b.Controls[0].Reg()
  1330  			}
  1331  		}
  1332  	case ssa.BlockLOONG64JUMPTABLE:
  1333  		// ALSLV $3, Rarg0, Rarg1, REGTMP
  1334  		// MOVV (REGTMP), REGTMP
  1335  		// JMP	(REGTMP)
  1336  		p := s.Prog(loong64.AALSLV)
  1337  		p.From.Type = obj.TYPE_CONST
  1338  		p.From.Offset = 3 // idx*8
  1339  		p.Reg = b.Controls[0].Reg()
  1340  		p.AddRestSourceReg(b.Controls[1].Reg())
  1341  		p.To.Type = obj.TYPE_REG
  1342  		p.To.Reg = loong64.REGTMP
  1343  		p1 := s.Prog(loong64.AMOVV)
  1344  		p1.From.Type = obj.TYPE_MEM
  1345  		p1.From.Reg = loong64.REGTMP
  1346  		p1.From.Offset = 0
  1347  		p1.To.Type = obj.TYPE_REG
  1348  		p1.To.Reg = loong64.REGTMP
  1349  		p2 := s.Prog(obj.AJMP)
  1350  		p2.To.Type = obj.TYPE_MEM
  1351  		p2.To.Reg = loong64.REGTMP
  1352  		// Save jump tables for later resolution of the target blocks.
  1353  		s.JumpTables = append(s.JumpTables, b)
  1354  
  1355  	default:
  1356  		b.Fatalf("branch not implemented: %s", b.LongString())
  1357  	}
  1358  }
  1359  
  1360  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1361  	p := s.Prog(loadByType(t, reg))
  1362  	p.From.Type = obj.TYPE_MEM
  1363  	p.From.Name = obj.NAME_AUTO
  1364  	p.From.Sym = n.Linksym()
  1365  	p.From.Offset = n.FrameOffset() + off
  1366  	p.To.Type = obj.TYPE_REG
  1367  	p.To.Reg = reg
  1368  	return p
  1369  }
  1370  
  1371  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1372  	p = pp.Append(p, storeByType(t, reg), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1373  	p.To.Name = obj.NAME_PARAM
  1374  	p.To.Sym = n.Linksym()
  1375  	p.Pos = p.Pos.WithNotStmt()
  1376  	return p
  1377  }
  1378  
  1379  // move8 copies 8 bytes at src+off to dst+off.
  1380  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1381  	// MOVV     off(src), tmp
  1382  	ld := s.Prog(loong64.AMOVV)
  1383  	ld.From.Type = obj.TYPE_MEM
  1384  	ld.From.Reg = src
  1385  	ld.From.Offset = off
  1386  	ld.To.Type = obj.TYPE_REG
  1387  	ld.To.Reg = tmp
  1388  	// MOVV     tmp, off(dst)
  1389  	st := s.Prog(loong64.AMOVV)
  1390  	st.From.Type = obj.TYPE_REG
  1391  	st.From.Reg = tmp
  1392  	st.To.Type = obj.TYPE_MEM
  1393  	st.To.Reg = dst
  1394  	st.To.Offset = off
  1395  }
  1396  
  1397  // zero8 zeroes 8 bytes at reg+off.
  1398  func zero8(s *ssagen.State, reg int16, off int64) {
  1399  	// MOVV   ZR, off(reg)
  1400  	p := s.Prog(loong64.AMOVV)
  1401  	p.From.Type = obj.TYPE_REG
  1402  	p.From.Reg = loong64.REGZERO
  1403  	p.To.Type = obj.TYPE_MEM
  1404  	p.To.Reg = reg
  1405  	p.To.Offset = off
  1406  }
  1407  
  1408  // zero16 zeroes 16 bytes at reg+off.
  1409  func zero16(s *ssagen.State, regZero, regBase int16, off int64) {
  1410  	// VMOVQ   regZero, off(regBase)
  1411  	p := s.Prog(loong64.AVMOVQ)
  1412  	p.From.Type = obj.TYPE_REG
  1413  	p.From.Reg = regZero
  1414  	p.To.Type = obj.TYPE_MEM
  1415  	p.To.Reg = regBase
  1416  	p.To.Offset = off
  1417  }
  1418  

View as plain text