Source file src/cmd/compile/internal/loong64/ssa.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package loong64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/loong64"
    19  	"internal/abi"
    20  )
    21  
    22  // isFPreg reports whether r is an FP register.
    23  func isFPreg(r int16) bool {
    24  	return loong64.REG_F0 <= r && r <= loong64.REG_F31
    25  }
    26  
    27  // loadByType returns the load instruction of the given type.
    28  func loadByType(t *types.Type, r int16) obj.As {
    29  	if isFPreg(r) {
    30  		if t.Size() == 4 {
    31  			return loong64.AMOVF
    32  		} else {
    33  			return loong64.AMOVD
    34  		}
    35  	} else {
    36  		switch t.Size() {
    37  		case 1:
    38  			if t.IsSigned() {
    39  				return loong64.AMOVB
    40  			} else {
    41  				return loong64.AMOVBU
    42  			}
    43  		case 2:
    44  			if t.IsSigned() {
    45  				return loong64.AMOVH
    46  			} else {
    47  				return loong64.AMOVHU
    48  			}
    49  		case 4:
    50  			if t.IsSigned() {
    51  				return loong64.AMOVW
    52  			} else {
    53  				return loong64.AMOVWU
    54  			}
    55  		case 8:
    56  			return loong64.AMOVV
    57  		}
    58  	}
    59  	panic("bad load type")
    60  }
    61  
    62  // storeByType returns the store instruction of the given type.
    63  func storeByType(t *types.Type, r int16) obj.As {
    64  	if isFPreg(r) {
    65  		if t.Size() == 4 {
    66  			return loong64.AMOVF
    67  		} else {
    68  			return loong64.AMOVD
    69  		}
    70  	} else {
    71  		switch t.Size() {
    72  		case 1:
    73  			return loong64.AMOVB
    74  		case 2:
    75  			return loong64.AMOVH
    76  		case 4:
    77  			return loong64.AMOVW
    78  		case 8:
    79  			return loong64.AMOVV
    80  		}
    81  	}
    82  	panic("bad store type")
    83  }
    84  
    85  // largestMove returns the largest move instruction possible and its size,
    86  // given the alignment of the total size of the move.
    87  //
    88  // e.g., a 16-byte move may use MOVV, but an 11-byte move must use MOVB.
    89  //
    90  // Note that the moves may not be on naturally aligned addresses depending on
    91  // the source and destination.
    92  //
    93  // This matches the calculation in ssa.moveSize.
    94  func largestMove(alignment int64) (obj.As, int64) {
    95  	switch {
    96  	case alignment%8 == 0:
    97  		return loong64.AMOVV, 8
    98  	case alignment%4 == 0:
    99  		return loong64.AMOVW, 4
   100  	case alignment%2 == 0:
   101  		return loong64.AMOVH, 2
   102  	default:
   103  		return loong64.AMOVB, 1
   104  	}
   105  }
   106  
   107  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   108  	switch v.Op {
   109  	case ssa.OpCopy, ssa.OpLOONG64MOVVreg:
   110  		if v.Type.IsMemory() {
   111  			return
   112  		}
   113  		x := v.Args[0].Reg()
   114  		y := v.Reg()
   115  		if x == y {
   116  			return
   117  		}
   118  		as := loong64.AMOVV
   119  		if isFPreg(x) && isFPreg(y) {
   120  			as = loong64.AMOVD
   121  		}
   122  		p := s.Prog(as)
   123  		p.From.Type = obj.TYPE_REG
   124  		p.From.Reg = x
   125  		p.To.Type = obj.TYPE_REG
   126  		p.To.Reg = y
   127  	case ssa.OpLOONG64MOVVnop,
   128  		ssa.OpLOONG64ZERO,
   129  		ssa.OpLOONG64LoweredRound32F,
   130  		ssa.OpLOONG64LoweredRound64F:
   131  		// nothing to do
   132  	case ssa.OpLoadReg:
   133  		if v.Type.IsFlags() {
   134  			v.Fatalf("load flags not implemented: %v", v.LongString())
   135  			return
   136  		}
   137  		r := v.Reg()
   138  		p := s.Prog(loadByType(v.Type, r))
   139  		ssagen.AddrAuto(&p.From, v.Args[0])
   140  		p.To.Type = obj.TYPE_REG
   141  		p.To.Reg = r
   142  	case ssa.OpStoreReg:
   143  		if v.Type.IsFlags() {
   144  			v.Fatalf("store flags not implemented: %v", v.LongString())
   145  			return
   146  		}
   147  		r := v.Args[0].Reg()
   148  		p := s.Prog(storeByType(v.Type, r))
   149  		p.From.Type = obj.TYPE_REG
   150  		p.From.Reg = r
   151  		ssagen.AddrAuto(&p.To, v)
   152  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   153  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   154  		// The loop only runs once.
   155  		for _, a := range v.Block.Func.RegArgs {
   156  			// Pass the spill/unspill information along to the assembler, offset by size of
   157  			// the saved LR slot.
   158  			addr := ssagen.SpillSlotAddr(a, loong64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   159  			s.FuncInfo().AddSpill(
   160  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type, a.Reg), Spill: storeByType(a.Type, a.Reg)})
   161  		}
   162  		v.Block.Func.RegArgs = nil
   163  		ssagen.CheckArgReg(v)
   164  	case ssa.OpLOONG64ADDV,
   165  		ssa.OpLOONG64SUBV,
   166  		ssa.OpLOONG64AND,
   167  		ssa.OpLOONG64OR,
   168  		ssa.OpLOONG64XOR,
   169  		ssa.OpLOONG64NOR,
   170  		ssa.OpLOONG64ANDN,
   171  		ssa.OpLOONG64ORN,
   172  		ssa.OpLOONG64SLL,
   173  		ssa.OpLOONG64SLLV,
   174  		ssa.OpLOONG64SRL,
   175  		ssa.OpLOONG64SRLV,
   176  		ssa.OpLOONG64SRA,
   177  		ssa.OpLOONG64SRAV,
   178  		ssa.OpLOONG64ROTR,
   179  		ssa.OpLOONG64ROTRV,
   180  		ssa.OpLOONG64ADDF,
   181  		ssa.OpLOONG64ADDD,
   182  		ssa.OpLOONG64SUBF,
   183  		ssa.OpLOONG64SUBD,
   184  		ssa.OpLOONG64MULF,
   185  		ssa.OpLOONG64MULD,
   186  		ssa.OpLOONG64DIVF,
   187  		ssa.OpLOONG64DIVD,
   188  		ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU, ssa.OpLOONG64MULH, ssa.OpLOONG64MULHU,
   189  		ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU,
   190  		ssa.OpLOONG64MULWVW, ssa.OpLOONG64MULWVWU,
   191  		ssa.OpLOONG64FCOPYSGD:
   192  		p := s.Prog(v.Op.Asm())
   193  		p.From.Type = obj.TYPE_REG
   194  		p.From.Reg = v.Args[1].Reg()
   195  		p.Reg = v.Args[0].Reg()
   196  		p.To.Type = obj.TYPE_REG
   197  		p.To.Reg = v.Reg()
   198  
   199  	case ssa.OpLOONG64BSTRPICKV,
   200  		ssa.OpLOONG64BSTRPICKW:
   201  		p := s.Prog(v.Op.Asm())
   202  		p.From.Type = obj.TYPE_CONST
   203  		if v.Op == ssa.OpLOONG64BSTRPICKW {
   204  			p.From.Offset = v.AuxInt >> 5
   205  			p.AddRestSourceConst(v.AuxInt & 0x1f)
   206  		} else {
   207  			p.From.Offset = v.AuxInt >> 6
   208  			p.AddRestSourceConst(v.AuxInt & 0x3f)
   209  		}
   210  		p.Reg = v.Args[0].Reg()
   211  		p.To.Type = obj.TYPE_REG
   212  		p.To.Reg = v.Reg()
   213  
   214  	case ssa.OpLOONG64FMINF,
   215  		ssa.OpLOONG64FMIND,
   216  		ssa.OpLOONG64FMAXF,
   217  		ssa.OpLOONG64FMAXD:
   218  		// ADDD Rarg0, Rarg1, Rout
   219  		// CMPEQD Rarg0, Rarg0, FCC0
   220  		// bceqz FCC0, end
   221  		// CMPEQD Rarg1, Rarg1, FCC0
   222  		// bceqz FCC0, end
   223  		// F(MIN|MAX)(F|D)
   224  
   225  		r0 := v.Args[0].Reg()
   226  		r1 := v.Args[1].Reg()
   227  		out := v.Reg()
   228  		add, fcmp := loong64.AADDD, loong64.ACMPEQD
   229  		if v.Op == ssa.OpLOONG64FMINF || v.Op == ssa.OpLOONG64FMAXF {
   230  			add = loong64.AADDF
   231  			fcmp = loong64.ACMPEQF
   232  		}
   233  		p1 := s.Prog(add)
   234  		p1.From.Type = obj.TYPE_REG
   235  		p1.From.Reg = r0
   236  		p1.Reg = r1
   237  		p1.To.Type = obj.TYPE_REG
   238  		p1.To.Reg = out
   239  
   240  		p2 := s.Prog(fcmp)
   241  		p2.From.Type = obj.TYPE_REG
   242  		p2.From.Reg = r0
   243  		p2.Reg = r0
   244  		p2.To.Type = obj.TYPE_REG
   245  		p2.To.Reg = loong64.REG_FCC0
   246  
   247  		p3 := s.Prog(loong64.ABFPF)
   248  		p3.To.Type = obj.TYPE_BRANCH
   249  
   250  		p4 := s.Prog(fcmp)
   251  		p4.From.Type = obj.TYPE_REG
   252  		p4.From.Reg = r1
   253  		p4.Reg = r1
   254  		p4.To.Type = obj.TYPE_REG
   255  		p4.To.Reg = loong64.REG_FCC0
   256  
   257  		p5 := s.Prog(loong64.ABFPF)
   258  		p5.To.Type = obj.TYPE_BRANCH
   259  
   260  		p6 := s.Prog(v.Op.Asm())
   261  		p6.From.Type = obj.TYPE_REG
   262  		p6.From.Reg = r1
   263  		p6.Reg = r0
   264  		p6.To.Type = obj.TYPE_REG
   265  		p6.To.Reg = out
   266  
   267  		nop := s.Prog(obj.ANOP)
   268  		p3.To.SetTarget(nop)
   269  		p5.To.SetTarget(nop)
   270  
   271  	case ssa.OpLOONG64SGT,
   272  		ssa.OpLOONG64SGTU:
   273  		p := s.Prog(v.Op.Asm())
   274  		p.From.Type = obj.TYPE_REG
   275  		p.From.Reg = v.Args[0].Reg()
   276  		p.Reg = v.Args[1].Reg()
   277  		p.To.Type = obj.TYPE_REG
   278  		p.To.Reg = v.Reg()
   279  	case ssa.OpLOONG64ADDVconst,
   280  		ssa.OpLOONG64ADDV16const,
   281  		ssa.OpLOONG64SUBVconst,
   282  		ssa.OpLOONG64ANDconst,
   283  		ssa.OpLOONG64ORconst,
   284  		ssa.OpLOONG64XORconst,
   285  		ssa.OpLOONG64SLLconst,
   286  		ssa.OpLOONG64SLLVconst,
   287  		ssa.OpLOONG64SRLconst,
   288  		ssa.OpLOONG64SRLVconst,
   289  		ssa.OpLOONG64SRAconst,
   290  		ssa.OpLOONG64SRAVconst,
   291  		ssa.OpLOONG64ROTRconst,
   292  		ssa.OpLOONG64ROTRVconst,
   293  		ssa.OpLOONG64SGTconst,
   294  		ssa.OpLOONG64SGTUconst:
   295  		p := s.Prog(v.Op.Asm())
   296  		p.From.Type = obj.TYPE_CONST
   297  		p.From.Offset = v.AuxInt
   298  		p.Reg = v.Args[0].Reg()
   299  		p.To.Type = obj.TYPE_REG
   300  		p.To.Reg = v.Reg()
   301  
   302  	case ssa.OpLOONG64NORconst:
   303  		// MOVV $const, Rtmp
   304  		// NOR  Rtmp, Rarg0, Rout
   305  		p := s.Prog(loong64.AMOVV)
   306  		p.From.Type = obj.TYPE_CONST
   307  		p.From.Offset = v.AuxInt
   308  		p.To.Type = obj.TYPE_REG
   309  		p.To.Reg = loong64.REGTMP
   310  
   311  		p2 := s.Prog(v.Op.Asm())
   312  		p2.From.Type = obj.TYPE_REG
   313  		p2.From.Reg = loong64.REGTMP
   314  		p2.Reg = v.Args[0].Reg()
   315  		p2.To.Type = obj.TYPE_REG
   316  		p2.To.Reg = v.Reg()
   317  
   318  	case ssa.OpLOONG64MOVVconst:
   319  		r := v.Reg()
   320  		p := s.Prog(v.Op.Asm())
   321  		p.From.Type = obj.TYPE_CONST
   322  		p.From.Offset = v.AuxInt
   323  		p.To.Type = obj.TYPE_REG
   324  		p.To.Reg = r
   325  		if isFPreg(r) {
   326  			// cannot move into FP or special registers, use TMP as intermediate
   327  			p.To.Reg = loong64.REGTMP
   328  			p = s.Prog(loong64.AMOVV)
   329  			p.From.Type = obj.TYPE_REG
   330  			p.From.Reg = loong64.REGTMP
   331  			p.To.Type = obj.TYPE_REG
   332  			p.To.Reg = r
   333  		}
   334  	case ssa.OpLOONG64MOVFconst,
   335  		ssa.OpLOONG64MOVDconst:
   336  		p := s.Prog(v.Op.Asm())
   337  		p.From.Type = obj.TYPE_FCONST
   338  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   339  		p.To.Type = obj.TYPE_REG
   340  		p.To.Reg = v.Reg()
   341  	case ssa.OpLOONG64CMPEQF,
   342  		ssa.OpLOONG64CMPEQD,
   343  		ssa.OpLOONG64CMPGEF,
   344  		ssa.OpLOONG64CMPGED,
   345  		ssa.OpLOONG64CMPGTF,
   346  		ssa.OpLOONG64CMPGTD:
   347  		p := s.Prog(v.Op.Asm())
   348  		p.From.Type = obj.TYPE_REG
   349  		p.From.Reg = v.Args[0].Reg()
   350  		p.Reg = v.Args[1].Reg()
   351  		p.To.Type = obj.TYPE_REG
   352  		p.To.Reg = loong64.REG_FCC0
   353  
   354  	case ssa.OpLOONG64FMADDF,
   355  		ssa.OpLOONG64FMADDD,
   356  		ssa.OpLOONG64FMSUBF,
   357  		ssa.OpLOONG64FMSUBD,
   358  		ssa.OpLOONG64FNMADDF,
   359  		ssa.OpLOONG64FNMADDD,
   360  		ssa.OpLOONG64FNMSUBF,
   361  		ssa.OpLOONG64FNMSUBD:
   362  		p := s.Prog(v.Op.Asm())
   363  		// r=(FMA x y z) -> FMADDD z, y, x, r
   364  		// the SSA operand order is for taking advantage of
   365  		// commutativity (that only applies for the first two operands)
   366  		r := v.Reg()
   367  		x := v.Args[0].Reg()
   368  		y := v.Args[1].Reg()
   369  		z := v.Args[2].Reg()
   370  		p.From.Type = obj.TYPE_REG
   371  		p.From.Reg = z
   372  		p.Reg = y
   373  		p.AddRestSourceReg(x)
   374  		p.To.Type = obj.TYPE_REG
   375  		p.To.Reg = r
   376  
   377  	case ssa.OpLOONG64MOVVaddr:
   378  		p := s.Prog(loong64.AMOVV)
   379  		p.From.Type = obj.TYPE_ADDR
   380  		p.From.Reg = v.Args[0].Reg()
   381  		var wantreg string
   382  		// MOVV $sym+off(base), R
   383  		// the assembler expands it as the following:
   384  		// - base is SP: add constant offset to SP (R3)
   385  		// when constant is large, tmp register (R30) may be used
   386  		// - base is SB: load external address with relocation
   387  		switch v.Aux.(type) {
   388  		default:
   389  			v.Fatalf("aux is of unknown type %T", v.Aux)
   390  		case *obj.LSym:
   391  			wantreg = "SB"
   392  			ssagen.AddAux(&p.From, v)
   393  		case *ir.Name:
   394  			wantreg = "SP"
   395  			ssagen.AddAux(&p.From, v)
   396  		case nil:
   397  			// No sym, just MOVV $off(SP), R
   398  			wantreg = "SP"
   399  			p.From.Offset = v.AuxInt
   400  		}
   401  		if reg := v.Args[0].RegName(); reg != wantreg {
   402  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   403  		}
   404  		p.To.Type = obj.TYPE_REG
   405  		p.To.Reg = v.Reg()
   406  
   407  	case ssa.OpLOONG64MOVBloadidx,
   408  		ssa.OpLOONG64MOVBUloadidx,
   409  		ssa.OpLOONG64MOVHloadidx,
   410  		ssa.OpLOONG64MOVHUloadidx,
   411  		ssa.OpLOONG64MOVWloadidx,
   412  		ssa.OpLOONG64MOVWUloadidx,
   413  		ssa.OpLOONG64MOVVloadidx,
   414  		ssa.OpLOONG64MOVFloadidx,
   415  		ssa.OpLOONG64MOVDloadidx:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_MEM
   418  		p.From.Name = obj.NAME_NONE
   419  		p.From.Reg = v.Args[0].Reg()
   420  		p.From.Index = v.Args[1].Reg()
   421  		p.To.Type = obj.TYPE_REG
   422  		p.To.Reg = v.Reg()
   423  
   424  	case ssa.OpLOONG64MOVBstoreidx,
   425  		ssa.OpLOONG64MOVHstoreidx,
   426  		ssa.OpLOONG64MOVWstoreidx,
   427  		ssa.OpLOONG64MOVVstoreidx,
   428  		ssa.OpLOONG64MOVFstoreidx,
   429  		ssa.OpLOONG64MOVDstoreidx:
   430  		p := s.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_REG
   432  		p.From.Reg = v.Args[2].Reg()
   433  		p.To.Type = obj.TYPE_MEM
   434  		p.To.Name = obj.NAME_NONE
   435  		p.To.Reg = v.Args[0].Reg()
   436  		p.To.Index = v.Args[1].Reg()
   437  
   438  	case ssa.OpLOONG64MOVBload,
   439  		ssa.OpLOONG64MOVBUload,
   440  		ssa.OpLOONG64MOVHload,
   441  		ssa.OpLOONG64MOVHUload,
   442  		ssa.OpLOONG64MOVWload,
   443  		ssa.OpLOONG64MOVWUload,
   444  		ssa.OpLOONG64MOVVload,
   445  		ssa.OpLOONG64MOVFload,
   446  		ssa.OpLOONG64MOVDload:
   447  		p := s.Prog(v.Op.Asm())
   448  		p.From.Type = obj.TYPE_MEM
   449  		p.From.Reg = v.Args[0].Reg()
   450  		ssagen.AddAux(&p.From, v)
   451  		p.To.Type = obj.TYPE_REG
   452  		p.To.Reg = v.Reg()
   453  	case ssa.OpLOONG64MOVBstore,
   454  		ssa.OpLOONG64MOVHstore,
   455  		ssa.OpLOONG64MOVWstore,
   456  		ssa.OpLOONG64MOVVstore,
   457  		ssa.OpLOONG64MOVFstore,
   458  		ssa.OpLOONG64MOVDstore:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_REG
   461  		p.From.Reg = v.Args[1].Reg()
   462  		p.To.Type = obj.TYPE_MEM
   463  		p.To.Reg = v.Args[0].Reg()
   464  		ssagen.AddAux(&p.To, v)
   465  	case ssa.OpLOONG64MOVBreg,
   466  		ssa.OpLOONG64MOVBUreg,
   467  		ssa.OpLOONG64MOVHreg,
   468  		ssa.OpLOONG64MOVHUreg,
   469  		ssa.OpLOONG64MOVWreg,
   470  		ssa.OpLOONG64MOVWUreg:
   471  		a := v.Args[0]
   472  		for a.Op == ssa.OpCopy || a.Op == ssa.OpLOONG64MOVVreg {
   473  			a = a.Args[0]
   474  		}
   475  		if a.Op == ssa.OpLoadReg && loong64.REG_R0 <= a.Reg() && a.Reg() <= loong64.REG_R31 {
   476  			// LoadReg from a narrower type does an extension, except loading
   477  			// to a floating point register. So only eliminate the extension
   478  			// if it is loaded to an integer register.
   479  
   480  			t := a.Type
   481  			switch {
   482  			case v.Op == ssa.OpLOONG64MOVBreg && t.Size() == 1 && t.IsSigned(),
   483  				v.Op == ssa.OpLOONG64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   484  				v.Op == ssa.OpLOONG64MOVHreg && t.Size() == 2 && t.IsSigned(),
   485  				v.Op == ssa.OpLOONG64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   486  				v.Op == ssa.OpLOONG64MOVWreg && t.Size() == 4 && t.IsSigned(),
   487  				v.Op == ssa.OpLOONG64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   488  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   489  				if v.Reg() == v.Args[0].Reg() {
   490  					return
   491  				}
   492  				p := s.Prog(loong64.AMOVV)
   493  				p.From.Type = obj.TYPE_REG
   494  				p.From.Reg = v.Args[0].Reg()
   495  				p.To.Type = obj.TYPE_REG
   496  				p.To.Reg = v.Reg()
   497  				return
   498  			default:
   499  			}
   500  		}
   501  		fallthrough
   502  
   503  	case ssa.OpLOONG64MOVWF,
   504  		ssa.OpLOONG64MOVWD,
   505  		ssa.OpLOONG64TRUNCFW,
   506  		ssa.OpLOONG64TRUNCDW,
   507  		ssa.OpLOONG64MOVVF,
   508  		ssa.OpLOONG64MOVVD,
   509  		ssa.OpLOONG64TRUNCFV,
   510  		ssa.OpLOONG64TRUNCDV,
   511  		ssa.OpLOONG64MOVFD,
   512  		ssa.OpLOONG64MOVDF,
   513  		ssa.OpLOONG64MOVWfpgp,
   514  		ssa.OpLOONG64MOVWgpfp,
   515  		ssa.OpLOONG64MOVVfpgp,
   516  		ssa.OpLOONG64MOVVgpfp,
   517  		ssa.OpLOONG64NEGF,
   518  		ssa.OpLOONG64NEGD,
   519  		ssa.OpLOONG64CLZW,
   520  		ssa.OpLOONG64CLZV,
   521  		ssa.OpLOONG64CTZW,
   522  		ssa.OpLOONG64CTZV,
   523  		ssa.OpLOONG64SQRTD,
   524  		ssa.OpLOONG64SQRTF,
   525  		ssa.OpLOONG64REVB2H,
   526  		ssa.OpLOONG64REVB2W,
   527  		ssa.OpLOONG64REVB4H,
   528  		ssa.OpLOONG64REVBV,
   529  		ssa.OpLOONG64BITREV4B,
   530  		ssa.OpLOONG64BITREVW,
   531  		ssa.OpLOONG64BITREVV,
   532  		ssa.OpLOONG64ABSF,
   533  		ssa.OpLOONG64ABSD:
   534  		p := s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = v.Args[0].Reg()
   537  		p.To.Type = obj.TYPE_REG
   538  		p.To.Reg = v.Reg()
   539  
   540  	case ssa.OpLOONG64VPCNT64,
   541  		ssa.OpLOONG64VPCNT32,
   542  		ssa.OpLOONG64VPCNT16:
   543  		p := s.Prog(v.Op.Asm())
   544  		p.From.Type = obj.TYPE_REG
   545  		p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = ((v.Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
   548  
   549  	case ssa.OpLOONG64NEGV:
   550  		// SUB from REGZERO
   551  		p := s.Prog(loong64.ASUBVU)
   552  		p.From.Type = obj.TYPE_REG
   553  		p.From.Reg = v.Args[0].Reg()
   554  		p.Reg = loong64.REGZERO
   555  		p.To.Type = obj.TYPE_REG
   556  		p.To.Reg = v.Reg()
   557  
   558  	case ssa.OpLOONG64LoweredZero:
   559  		ptrReg := v.Args[0].Reg()
   560  		n := v.AuxInt
   561  		if n < 16 {
   562  			v.Fatalf("Zero too small %d", n)
   563  		}
   564  
   565  		// Generate Zeroing instructions.
   566  		var off int64
   567  		for n >= 8 {
   568  			// MOVV     ZR, off(ptrReg)
   569  			zero8(s, ptrReg, off)
   570  			off += 8
   571  			n -= 8
   572  		}
   573  		if n != 0 {
   574  			// MOVV     ZR, off+n-8(ptrReg)
   575  			zero8(s, ptrReg, off+n-8)
   576  		}
   577  	case ssa.OpLOONG64LoweredZeroLoop:
   578  		ptrReg := v.Args[0].Reg()
   579  		endReg := v.RegTmp()
   580  		flagReg := int16(loong64.REGTMP)
   581  		var off int64
   582  		n := v.AuxInt
   583  		loopSize := int64(64)
   584  		if n < 3*loopSize {
   585  			// - a loop count of 0 won't work.
   586  			// - a loop count of 1 is useless.
   587  			// - a loop count of 2 is a code size ~tie
   588  			//     4 instructions to implement the loop
   589  			//     8 instructions in the loop body
   590  			//   vs
   591  			//     16 instuctions in the straightline code
   592  			//   Might as well use straightline code.
   593  			v.Fatalf("ZeroLoop size too small %d", n)
   594  		}
   595  
   596  		//    ADDV    n - n%loopSize, ptrReg, endReg
   597  		//    MOVBU   ir.Syms.Loong64HasLSX, flagReg
   598  		//    BNE     flagReg, lsxInit
   599  		// genericLoop:
   600  		//    for off = 0; off < loopSize; off += 8 {
   601  		//            zero8(s, ptrReg, off)
   602  		//    }
   603  		//    ADDV    $loopSize, ptrReg
   604  		//    BNE     endReg, ptrReg, genericLoop
   605  		//    JMP     tail
   606  		// lsxInit:
   607  		//    VXORV   V31, V31, V31
   608  		// lsxLoop:
   609  		//    for off = 0; off < loopSize; off += 16 {
   610  		//            zero16(s, V31, ptrReg, off)
   611  		//    }
   612  		//    ADDV    $loopSize, ptrReg
   613  		//    BNE     endReg, ptrReg, lsxLoop
   614  		// tail:
   615  		//    n %= loopSize
   616  		//    for off = 0; n >= 8; off += 8, n -= 8 {
   617  		//            zero8(s, ptrReg, off)
   618  		//    }
   619  		//
   620  		//    if n != 0 {
   621  		//           zero8(s, ptrReg, off+n-8)
   622  		//    }
   623  
   624  		p1 := s.Prog(loong64.AADDV)
   625  		p1.From.Type = obj.TYPE_CONST
   626  		p1.From.Offset = n - n%loopSize
   627  		p1.Reg = ptrReg
   628  		p1.To.Type = obj.TYPE_REG
   629  		p1.To.Reg = endReg
   630  
   631  		p2 := s.Prog(loong64.AMOVBU)
   632  		p2.From.Type = obj.TYPE_MEM
   633  		p2.From.Name = obj.NAME_EXTERN
   634  		p2.From.Sym = ir.Syms.Loong64HasLSX
   635  		p2.To.Type = obj.TYPE_REG
   636  		p2.To.Reg = flagReg
   637  
   638  		p3 := s.Prog(loong64.ABNE)
   639  		p3.From.Type = obj.TYPE_REG
   640  		p3.From.Reg = flagReg
   641  		p3.To.Type = obj.TYPE_BRANCH
   642  
   643  		for off = 0; off < loopSize; off += 8 {
   644  			zero8(s, ptrReg, off)
   645  		}
   646  
   647  		p4 := s.Prog(loong64.AADDV)
   648  		p4.From.Type = obj.TYPE_CONST
   649  		p4.From.Offset = loopSize
   650  		p4.To.Type = obj.TYPE_REG
   651  		p4.To.Reg = ptrReg
   652  
   653  		p5 := s.Prog(loong64.ABNE)
   654  		p5.From.Type = obj.TYPE_REG
   655  		p5.From.Reg = endReg
   656  		p5.Reg = ptrReg
   657  		p5.To.Type = obj.TYPE_BRANCH
   658  		p5.To.SetTarget(p3.Link)
   659  
   660  		p6 := s.Prog(obj.AJMP)
   661  		p6.To.Type = obj.TYPE_BRANCH
   662  
   663  		p7 := s.Prog(loong64.AVXORV)
   664  		p7.From.Type = obj.TYPE_REG
   665  		p7.From.Reg = loong64.REG_V31
   666  		p7.To.Type = obj.TYPE_REG
   667  		p7.To.Reg = loong64.REG_V31
   668  		p3.To.SetTarget(p7)
   669  
   670  		for off = 0; off < loopSize; off += 16 {
   671  			zero16(s, loong64.REG_V31, ptrReg, off)
   672  		}
   673  
   674  		p8 := s.Prog(loong64.AADDV)
   675  		p8.From.Type = obj.TYPE_CONST
   676  		p8.From.Offset = loopSize
   677  		p8.To.Type = obj.TYPE_REG
   678  		p8.To.Reg = ptrReg
   679  
   680  		p9 := s.Prog(loong64.ABNE)
   681  		p9.From.Type = obj.TYPE_REG
   682  		p9.From.Reg = endReg
   683  		p9.Reg = ptrReg
   684  		p9.To.Type = obj.TYPE_BRANCH
   685  		p9.To.SetTarget(p7.Link)
   686  
   687  		p10 := s.Prog(obj.ANOP)
   688  		p6.To.SetTarget(p10)
   689  
   690  		// Multiples of the loop size are now done.
   691  		n %= loopSize
   692  		// Write any fractional portion.
   693  		for off = 0; n >= 8; off += 8 {
   694  			// MOVV   ZR, off(ptrReg)
   695  			zero8(s, ptrReg, off)
   696  			n -= 8
   697  		}
   698  
   699  		if n != 0 {
   700  			zero8(s, ptrReg, off+n-8)
   701  		}
   702  
   703  	case ssa.OpLOONG64LoweredMove:
   704  		dstReg := v.Args[0].Reg()
   705  		srcReg := v.Args[1].Reg()
   706  		if dstReg == srcReg {
   707  			break
   708  		}
   709  		tmpReg := int16(loong64.REG_R23)
   710  		n := v.AuxInt
   711  		if n < 16 {
   712  			v.Fatalf("Move too small %d", n)
   713  		}
   714  
   715  		var off int64
   716  		for n >= 8 {
   717  			// MOVV     off(srcReg), tmpReg
   718  			// MOVV     tmpReg, off(dstReg)
   719  			move8(s, srcReg, dstReg, tmpReg, off)
   720  			off += 8
   721  			n -= 8
   722  		}
   723  
   724  		if n != 0 {
   725  			// MOVV     off+n-8(srcReg), tmpReg
   726  			// MOVV     tmpReg, off+n-8(srcReg)
   727  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   728  		}
   729  	case ssa.OpLOONG64LoweredMoveLoop:
   730  		dstReg := v.Args[0].Reg()
   731  		srcReg := v.Args[1].Reg()
   732  		if dstReg == srcReg {
   733  			break
   734  		}
   735  		srcEndReg := int16(loong64.REG_R23)
   736  		tmpReg := int16(loong64.REG_R24)
   737  		var off int64
   738  		n := v.AuxInt
   739  		loopSize := int64(64)
   740  		if n < 3*loopSize {
   741  			// - a loop count of 0 won't work.
   742  			// - a loop count of 1 is useless.
   743  			// - a loop count of 2 is a code size ~tie
   744  			//     4 instructions to implement the loop
   745  			//     8 instructions in the loop body
   746  			//   vs
   747  			//     16 instructions in the straightline code
   748  			//   Might as well use straightline code.
   749  			v.Fatalf("MoveLoop size too small %d", n)
   750  		}
   751  
   752  		//    ADDV    n - n%loopSize, srcReg, srcEndReg
   753  		// Loop8:
   754  		//    for off = 0; off < loopSize; off += 8 {
   755  		//            move8(s, srcReg, dstReg, tmpReg, off)
   756  		//    }
   757  		//    ADDV    $loopSize, srcReg
   758  		//    ADDV    $loopSize, dstReg
   759  		//    BNE     srcEndReg, srcReg, Loop8
   760  		//
   761  		//    n %= loopSize
   762  		//    for off = 0; n >= 8; off += 8 {
   763  		//           move8(s, srcReg, dstReg, tmpReg, off)
   764  		//           n -= 8
   765  		//    }
   766  		//
   767  		//    if n != 0 {
   768  		//           move8(s, srcReg, dstReg, tmpReg, off+n-8)
   769  		//    }
   770  
   771  		p1 := s.Prog(loong64.AADDV)
   772  		p1.From.Type = obj.TYPE_CONST
   773  		p1.From.Offset = n - n%loopSize
   774  		p1.Reg = srcReg
   775  		p1.To.Type = obj.TYPE_REG
   776  		p1.To.Reg = srcEndReg
   777  
   778  		for off = 0; off < loopSize; off += 8 {
   779  			move8(s, srcReg, dstReg, tmpReg, off)
   780  		}
   781  
   782  		p2 := s.Prog(loong64.AADDV)
   783  		p2.From.Type = obj.TYPE_CONST
   784  		p2.From.Offset = loopSize
   785  		p2.To.Type = obj.TYPE_REG
   786  		p2.To.Reg = srcReg
   787  
   788  		p3 := s.Prog(loong64.AADDV)
   789  		p3.From.Type = obj.TYPE_CONST
   790  		p3.From.Offset = loopSize
   791  		p3.To.Type = obj.TYPE_REG
   792  		p3.To.Reg = dstReg
   793  
   794  		p4 := s.Prog(loong64.ABNE)
   795  		p4.From.Type = obj.TYPE_REG
   796  		p4.From.Reg = srcEndReg
   797  		p4.Reg = srcReg
   798  		p4.To.Type = obj.TYPE_BRANCH
   799  		p4.To.SetTarget(p1.Link)
   800  
   801  		// Multiples of the loop size are now done.
   802  		n %= loopSize
   803  
   804  		// Copy any fractional portion.
   805  		for off = 0; n >= 8; off += 8 {
   806  			move8(s, srcReg, dstReg, tmpReg, off)
   807  			n -= 8
   808  		}
   809  
   810  		if n != 0 {
   811  			move8(s, srcReg, dstReg, tmpReg, off+n-8)
   812  		}
   813  
   814  	case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter:
   815  		s.Call(v)
   816  	case ssa.OpLOONG64CALLtail, ssa.OpLOONG64CALLtailinter:
   817  		s.TailCall(v)
   818  	case ssa.OpLOONG64LoweredWB:
   819  		p := s.Prog(obj.ACALL)
   820  		p.To.Type = obj.TYPE_MEM
   821  		p.To.Name = obj.NAME_EXTERN
   822  		// AuxInt encodes how many buffer entries we need.
   823  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
   824  
   825  	case ssa.OpLOONG64LoweredPubBarrier:
   826  		// DBAR 0x1A
   827  		p := s.Prog(v.Op.Asm())
   828  		p.From.Type = obj.TYPE_CONST
   829  		p.From.Offset = 0x1A
   830  
   831  	case ssa.OpLOONG64LoweredPanicBoundsRR, ssa.OpLOONG64LoweredPanicBoundsRC, ssa.OpLOONG64LoweredPanicBoundsCR, ssa.OpLOONG64LoweredPanicBoundsCC:
   832  		// Compute the constant we put in the PCData entry for this call.
   833  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
   834  		xIsReg := false
   835  		yIsReg := false
   836  		xVal := 0
   837  		yVal := 0
   838  		switch v.Op {
   839  		case ssa.OpLOONG64LoweredPanicBoundsRR:
   840  			xIsReg = true
   841  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   842  			yIsReg = true
   843  			yVal = int(v.Args[1].Reg() - loong64.REG_R4)
   844  		case ssa.OpLOONG64LoweredPanicBoundsRC:
   845  			xIsReg = true
   846  			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
   847  			c := v.Aux.(ssa.PanicBoundsC).C
   848  			if c >= 0 && c <= abi.BoundsMaxConst {
   849  				yVal = int(c)
   850  			} else {
   851  				// Move constant to a register
   852  				yIsReg = true
   853  				if yVal == xVal {
   854  					yVal = 1
   855  				}
   856  				p := s.Prog(loong64.AMOVV)
   857  				p.From.Type = obj.TYPE_CONST
   858  				p.From.Offset = c
   859  				p.To.Type = obj.TYPE_REG
   860  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   861  			}
   862  		case ssa.OpLOONG64LoweredPanicBoundsCR:
   863  			yIsReg = true
   864  			yVal = int(v.Args[0].Reg() - loong64.REG_R4)
   865  			c := v.Aux.(ssa.PanicBoundsC).C
   866  			if c >= 0 && c <= abi.BoundsMaxConst {
   867  				xVal = int(c)
   868  			} else {
   869  				// Move constant to a register
   870  				xIsReg = true
   871  				if xVal == yVal {
   872  					xVal = 1
   873  				}
   874  				p := s.Prog(loong64.AMOVV)
   875  				p.From.Type = obj.TYPE_CONST
   876  				p.From.Offset = c
   877  				p.To.Type = obj.TYPE_REG
   878  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   879  			}
   880  		case ssa.OpLOONG64LoweredPanicBoundsCC:
   881  			c := v.Aux.(ssa.PanicBoundsCC).Cx
   882  			if c >= 0 && c <= abi.BoundsMaxConst {
   883  				xVal = int(c)
   884  			} else {
   885  				// Move constant to a register
   886  				xIsReg = true
   887  				p := s.Prog(loong64.AMOVV)
   888  				p.From.Type = obj.TYPE_CONST
   889  				p.From.Offset = c
   890  				p.To.Type = obj.TYPE_REG
   891  				p.To.Reg = loong64.REG_R4 + int16(xVal)
   892  			}
   893  			c = v.Aux.(ssa.PanicBoundsCC).Cy
   894  			if c >= 0 && c <= abi.BoundsMaxConst {
   895  				yVal = int(c)
   896  			} else {
   897  				// Move constant to a register
   898  				yIsReg = true
   899  				yVal = 1
   900  				p := s.Prog(loong64.AMOVV)
   901  				p.From.Type = obj.TYPE_CONST
   902  				p.From.Offset = c
   903  				p.To.Type = obj.TYPE_REG
   904  				p.To.Reg = loong64.REG_R4 + int16(yVal)
   905  			}
   906  		}
   907  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
   908  
   909  		p := s.Prog(obj.APCDATA)
   910  		p.From.SetConst(abi.PCDATA_PanicBounds)
   911  		p.To.SetConst(int64(c))
   912  		p = s.Prog(obj.ACALL)
   913  		p.To.Type = obj.TYPE_MEM
   914  		p.To.Name = obj.NAME_EXTERN
   915  		p.To.Sym = ir.Syms.PanicBounds
   916  
   917  	case ssa.OpLOONG64LoweredAtomicLoad8, ssa.OpLOONG64LoweredAtomicLoad32, ssa.OpLOONG64LoweredAtomicLoad64:
   918  		// MOVB	(Rarg0), Rout
   919  		// DBAR	0x14
   920  		as := loong64.AMOVV
   921  		switch v.Op {
   922  		case ssa.OpLOONG64LoweredAtomicLoad8:
   923  			as = loong64.AMOVB
   924  		case ssa.OpLOONG64LoweredAtomicLoad32:
   925  			as = loong64.AMOVW
   926  		}
   927  		p := s.Prog(as)
   928  		p.From.Type = obj.TYPE_MEM
   929  		p.From.Reg = v.Args[0].Reg()
   930  		p.To.Type = obj.TYPE_REG
   931  		p.To.Reg = v.Reg0()
   932  		p1 := s.Prog(loong64.ADBAR)
   933  		p1.From.Type = obj.TYPE_CONST
   934  		p1.From.Offset = 0x14
   935  
   936  	case ssa.OpLOONG64LoweredAtomicStore8,
   937  		ssa.OpLOONG64LoweredAtomicStore32,
   938  		ssa.OpLOONG64LoweredAtomicStore64:
   939  		// DBAR 0x12
   940  		// MOVx (Rarg1), Rout
   941  		// DBAR 0x18
   942  		movx := loong64.AMOVV
   943  		switch v.Op {
   944  		case ssa.OpLOONG64LoweredAtomicStore8:
   945  			movx = loong64.AMOVB
   946  		case ssa.OpLOONG64LoweredAtomicStore32:
   947  			movx = loong64.AMOVW
   948  		}
   949  		p := s.Prog(loong64.ADBAR)
   950  		p.From.Type = obj.TYPE_CONST
   951  		p.From.Offset = 0x12
   952  
   953  		p1 := s.Prog(movx)
   954  		p1.From.Type = obj.TYPE_REG
   955  		p1.From.Reg = v.Args[1].Reg()
   956  		p1.To.Type = obj.TYPE_MEM
   957  		p1.To.Reg = v.Args[0].Reg()
   958  
   959  		p2 := s.Prog(loong64.ADBAR)
   960  		p2.From.Type = obj.TYPE_CONST
   961  		p2.From.Offset = 0x18
   962  
   963  	case ssa.OpLOONG64LoweredAtomicStore8Variant,
   964  		ssa.OpLOONG64LoweredAtomicStore32Variant,
   965  		ssa.OpLOONG64LoweredAtomicStore64Variant:
   966  		//AMSWAPx  Rarg1, (Rarg0), Rout
   967  		amswapx := loong64.AAMSWAPDBV
   968  		switch v.Op {
   969  		case ssa.OpLOONG64LoweredAtomicStore32Variant:
   970  			amswapx = loong64.AAMSWAPDBW
   971  		case ssa.OpLOONG64LoweredAtomicStore8Variant:
   972  			amswapx = loong64.AAMSWAPDBB
   973  		}
   974  		p := s.Prog(amswapx)
   975  		p.From.Type = obj.TYPE_REG
   976  		p.From.Reg = v.Args[1].Reg()
   977  		p.To.Type = obj.TYPE_MEM
   978  		p.To.Reg = v.Args[0].Reg()
   979  		p.RegTo2 = loong64.REGZERO
   980  
   981  	case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64:
   982  		// AMSWAPx	Rarg1, (Rarg0), Rout
   983  		amswapx := loong64.AAMSWAPDBV
   984  		if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 {
   985  			amswapx = loong64.AAMSWAPDBW
   986  		}
   987  		p := s.Prog(amswapx)
   988  		p.From.Type = obj.TYPE_REG
   989  		p.From.Reg = v.Args[1].Reg()
   990  		p.To.Type = obj.TYPE_MEM
   991  		p.To.Reg = v.Args[0].Reg()
   992  		p.RegTo2 = v.Reg0()
   993  
   994  	case ssa.OpLOONG64LoweredAtomicExchange8Variant:
   995  		// AMSWAPDBB	Rarg1, (Rarg0), Rout
   996  		p := s.Prog(loong64.AAMSWAPDBB)
   997  		p.From.Type = obj.TYPE_REG
   998  		p.From.Reg = v.Args[1].Reg()
   999  		p.To.Type = obj.TYPE_MEM
  1000  		p.To.Reg = v.Args[0].Reg()
  1001  		p.RegTo2 = v.Reg0()
  1002  
  1003  	case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
  1004  		// AMADDx  Rarg1, (Rarg0), Rout
  1005  		// ADDV    Rarg1, Rout, Rout
  1006  		amaddx := loong64.AAMADDDBV
  1007  		addx := loong64.AADDV
  1008  		if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 {
  1009  			amaddx = loong64.AAMADDDBW
  1010  		}
  1011  		p := s.Prog(amaddx)
  1012  		p.From.Type = obj.TYPE_REG
  1013  		p.From.Reg = v.Args[1].Reg()
  1014  		p.To.Type = obj.TYPE_MEM
  1015  		p.To.Reg = v.Args[0].Reg()
  1016  		p.RegTo2 = v.Reg0()
  1017  
  1018  		p1 := s.Prog(addx)
  1019  		p1.From.Type = obj.TYPE_REG
  1020  		p1.From.Reg = v.Args[1].Reg()
  1021  		p1.Reg = v.Reg0()
  1022  		p1.To.Type = obj.TYPE_REG
  1023  		p1.To.Reg = v.Reg0()
  1024  
  1025  	case ssa.OpLOONG64LoweredAtomicCas32, ssa.OpLOONG64LoweredAtomicCas64:
  1026  		// MOVV $0, Rout
  1027  		// DBAR 0x14
  1028  		// LL	(Rarg0), Rtmp
  1029  		// BNE	Rtmp, Rarg1, 4(PC)
  1030  		// MOVV Rarg2, Rout
  1031  		// SC	Rout, (Rarg0)
  1032  		// BEQ	Rout, -4(PC)
  1033  		// DBAR 0x12
  1034  		ll := loong64.ALLV
  1035  		sc := loong64.ASCV
  1036  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32 {
  1037  			ll = loong64.ALL
  1038  			sc = loong64.ASC
  1039  		}
  1040  
  1041  		p := s.Prog(loong64.AMOVV)
  1042  		p.From.Type = obj.TYPE_REG
  1043  		p.From.Reg = loong64.REGZERO
  1044  		p.To.Type = obj.TYPE_REG
  1045  		p.To.Reg = v.Reg0()
  1046  
  1047  		p1 := s.Prog(loong64.ADBAR)
  1048  		p1.From.Type = obj.TYPE_CONST
  1049  		p1.From.Offset = 0x14
  1050  
  1051  		p2 := s.Prog(ll)
  1052  		p2.From.Type = obj.TYPE_MEM
  1053  		p2.From.Reg = v.Args[0].Reg()
  1054  		p2.To.Type = obj.TYPE_REG
  1055  		p2.To.Reg = loong64.REGTMP
  1056  
  1057  		p3 := s.Prog(loong64.ABNE)
  1058  		p3.From.Type = obj.TYPE_REG
  1059  		p3.From.Reg = v.Args[1].Reg()
  1060  		p3.Reg = loong64.REGTMP
  1061  		p3.To.Type = obj.TYPE_BRANCH
  1062  
  1063  		p4 := s.Prog(loong64.AMOVV)
  1064  		p4.From.Type = obj.TYPE_REG
  1065  		p4.From.Reg = v.Args[2].Reg()
  1066  		p4.To.Type = obj.TYPE_REG
  1067  		p4.To.Reg = v.Reg0()
  1068  
  1069  		p5 := s.Prog(sc)
  1070  		p5.From.Type = obj.TYPE_REG
  1071  		p5.From.Reg = v.Reg0()
  1072  		p5.To.Type = obj.TYPE_MEM
  1073  		p5.To.Reg = v.Args[0].Reg()
  1074  
  1075  		p6 := s.Prog(loong64.ABEQ)
  1076  		p6.From.Type = obj.TYPE_REG
  1077  		p6.From.Reg = v.Reg0()
  1078  		p6.To.Type = obj.TYPE_BRANCH
  1079  		p6.To.SetTarget(p2)
  1080  
  1081  		p7 := s.Prog(loong64.ADBAR)
  1082  		p7.From.Type = obj.TYPE_CONST
  1083  		p7.From.Offset = 0x12
  1084  		p3.To.SetTarget(p7)
  1085  
  1086  	case ssa.OpLOONG64LoweredAtomicAnd32,
  1087  		ssa.OpLOONG64LoweredAtomicOr32:
  1088  		// AM{AND,OR}DBx  Rarg1, (Rarg0), RegZero
  1089  		p := s.Prog(v.Op.Asm())
  1090  		p.From.Type = obj.TYPE_REG
  1091  		p.From.Reg = v.Args[1].Reg()
  1092  		p.To.Type = obj.TYPE_MEM
  1093  		p.To.Reg = v.Args[0].Reg()
  1094  		p.RegTo2 = loong64.REGZERO
  1095  
  1096  	case ssa.OpLOONG64LoweredAtomicAnd32value,
  1097  		ssa.OpLOONG64LoweredAtomicAnd64value,
  1098  		ssa.OpLOONG64LoweredAtomicOr64value,
  1099  		ssa.OpLOONG64LoweredAtomicOr32value:
  1100  		// AM{AND,OR}DBx  Rarg1, (Rarg0), Rout
  1101  		p := s.Prog(v.Op.Asm())
  1102  		p.From.Type = obj.TYPE_REG
  1103  		p.From.Reg = v.Args[1].Reg()
  1104  		p.To.Type = obj.TYPE_MEM
  1105  		p.To.Reg = v.Args[0].Reg()
  1106  		p.RegTo2 = v.Reg0()
  1107  
  1108  	case ssa.OpLOONG64LoweredAtomicCas64Variant, ssa.OpLOONG64LoweredAtomicCas32Variant:
  1109  		// MOVV         $0, Rout
  1110  		// MOVV         Rarg1, Rtmp
  1111  		// AMCASDBx     Rarg2, (Rarg0), Rtmp
  1112  		// BNE          Rarg1, Rtmp, 2(PC)
  1113  		// MOVV         $1, Rout
  1114  		// NOP
  1115  
  1116  		amcasx := loong64.AAMCASDBV
  1117  		if v.Op == ssa.OpLOONG64LoweredAtomicCas32Variant {
  1118  			amcasx = loong64.AAMCASDBW
  1119  		}
  1120  
  1121  		p := s.Prog(loong64.AMOVV)
  1122  		p.From.Type = obj.TYPE_REG
  1123  		p.From.Reg = loong64.REGZERO
  1124  		p.To.Type = obj.TYPE_REG
  1125  		p.To.Reg = v.Reg0()
  1126  
  1127  		p1 := s.Prog(loong64.AMOVV)
  1128  		p1.From.Type = obj.TYPE_REG
  1129  		p1.From.Reg = v.Args[1].Reg()
  1130  		p1.To.Type = obj.TYPE_REG
  1131  		p1.To.Reg = loong64.REGTMP
  1132  
  1133  		p2 := s.Prog(amcasx)
  1134  		p2.From.Type = obj.TYPE_REG
  1135  		p2.From.Reg = v.Args[2].Reg()
  1136  		p2.To.Type = obj.TYPE_MEM
  1137  		p2.To.Reg = v.Args[0].Reg()
  1138  		p2.RegTo2 = loong64.REGTMP
  1139  
  1140  		p3 := s.Prog(loong64.ABNE)
  1141  		p3.From.Type = obj.TYPE_REG
  1142  		p3.From.Reg = v.Args[1].Reg()
  1143  		p3.Reg = loong64.REGTMP
  1144  		p3.To.Type = obj.TYPE_BRANCH
  1145  
  1146  		p4 := s.Prog(loong64.AMOVV)
  1147  		p4.From.Type = obj.TYPE_CONST
  1148  		p4.From.Offset = 0x1
  1149  		p4.To.Type = obj.TYPE_REG
  1150  		p4.To.Reg = v.Reg0()
  1151  
  1152  		p5 := s.Prog(obj.ANOP)
  1153  		p3.To.SetTarget(p5)
  1154  
  1155  	case ssa.OpLOONG64LoweredNilCheck:
  1156  		// Issue a load which will fault if arg is nil.
  1157  		p := s.Prog(loong64.AMOVB)
  1158  		p.From.Type = obj.TYPE_MEM
  1159  		p.From.Reg = v.Args[0].Reg()
  1160  		ssagen.AddAux(&p.From, v)
  1161  		p.To.Type = obj.TYPE_REG
  1162  		p.To.Reg = loong64.REGTMP
  1163  		if logopt.Enabled() {
  1164  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1165  		}
  1166  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1167  			base.WarnfAt(v.Pos, "generated nil check")
  1168  		}
  1169  	case ssa.OpLOONG64FPFlagTrue,
  1170  		ssa.OpLOONG64FPFlagFalse:
  1171  		// MOVV	$0, r
  1172  		// BFPF	2(PC)
  1173  		// MOVV	$1, r
  1174  		branch := loong64.ABFPF
  1175  		if v.Op == ssa.OpLOONG64FPFlagFalse {
  1176  			branch = loong64.ABFPT
  1177  		}
  1178  		p := s.Prog(loong64.AMOVV)
  1179  		p.From.Type = obj.TYPE_REG
  1180  		p.From.Reg = loong64.REGZERO
  1181  		p.To.Type = obj.TYPE_REG
  1182  		p.To.Reg = v.Reg()
  1183  		p2 := s.Prog(branch)
  1184  		p2.To.Type = obj.TYPE_BRANCH
  1185  		p3 := s.Prog(loong64.AMOVV)
  1186  		p3.From.Type = obj.TYPE_CONST
  1187  		p3.From.Offset = 1
  1188  		p3.To.Type = obj.TYPE_REG
  1189  		p3.To.Reg = v.Reg()
  1190  		p4 := s.Prog(obj.ANOP) // not a machine instruction, for branch to land
  1191  		p2.To.SetTarget(p4)
  1192  	case ssa.OpLOONG64LoweredGetClosurePtr:
  1193  		// Closure pointer is R22 (loong64.REGCTXT).
  1194  		ssagen.CheckLoweredGetClosurePtr(v)
  1195  	case ssa.OpLOONG64LoweredGetCallerSP:
  1196  		// caller's SP is FixedFrameSize below the address of the first arg
  1197  		p := s.Prog(loong64.AMOVV)
  1198  		p.From.Type = obj.TYPE_ADDR
  1199  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1200  		p.From.Name = obj.NAME_PARAM
  1201  		p.To.Type = obj.TYPE_REG
  1202  		p.To.Reg = v.Reg()
  1203  	case ssa.OpLOONG64LoweredGetCallerPC:
  1204  		p := s.Prog(obj.AGETCALLERPC)
  1205  		p.To.Type = obj.TYPE_REG
  1206  		p.To.Reg = v.Reg()
  1207  	case ssa.OpLOONG64MASKEQZ, ssa.OpLOONG64MASKNEZ:
  1208  		p := s.Prog(v.Op.Asm())
  1209  		p.From.Type = obj.TYPE_REG
  1210  		p.From.Reg = v.Args[1].Reg()
  1211  		p.Reg = v.Args[0].Reg()
  1212  		p.To.Type = obj.TYPE_REG
  1213  		p.To.Reg = v.Reg()
  1214  
  1215  	case ssa.OpLOONG64PRELD:
  1216  		// PRELD (Rarg0), hint
  1217  		p := s.Prog(v.Op.Asm())
  1218  		p.From.Type = obj.TYPE_MEM
  1219  		p.From.Reg = v.Args[0].Reg()
  1220  		p.AddRestSourceConst(v.AuxInt & 0x1f)
  1221  
  1222  	case ssa.OpLOONG64PRELDX:
  1223  		// PRELDX (Rarg0), $n, $hint
  1224  		p := s.Prog(v.Op.Asm())
  1225  		p.From.Type = obj.TYPE_MEM
  1226  		p.From.Reg = v.Args[0].Reg()
  1227  		p.AddRestSourceArgs([]obj.Addr{
  1228  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 5) & 0x1fffffffff},
  1229  			{Type: obj.TYPE_CONST, Offset: (v.AuxInt >> 0) & 0x1f},
  1230  		})
  1231  
  1232  	case ssa.OpLOONG64ADDshiftLLV:
  1233  		// ADDshiftLLV Rarg0, Rarg1, $shift
  1234  		// ALSLV $shift, Rarg1, Rarg0, Rtmp
  1235  		p := s.Prog(v.Op.Asm())
  1236  		p.From.Type = obj.TYPE_CONST
  1237  		p.From.Offset = v.AuxInt
  1238  		p.Reg = v.Args[1].Reg()
  1239  		p.AddRestSourceReg(v.Args[0].Reg())
  1240  		p.To.Type = obj.TYPE_REG
  1241  		p.To.Reg = v.Reg()
  1242  
  1243  	case ssa.OpClobber, ssa.OpClobberReg:
  1244  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1245  	default:
  1246  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1247  	}
  1248  }
  1249  
  1250  var blockJump = map[ssa.BlockKind]struct {
  1251  	asm, invasm obj.As
  1252  }{
  1253  	ssa.BlockLOONG64EQZ:  {loong64.ABEQ, loong64.ABNE},
  1254  	ssa.BlockLOONG64NEZ:  {loong64.ABNE, loong64.ABEQ},
  1255  	ssa.BlockLOONG64LTZ:  {loong64.ABLTZ, loong64.ABGEZ},
  1256  	ssa.BlockLOONG64GEZ:  {loong64.ABGEZ, loong64.ABLTZ},
  1257  	ssa.BlockLOONG64LEZ:  {loong64.ABLEZ, loong64.ABGTZ},
  1258  	ssa.BlockLOONG64GTZ:  {loong64.ABGTZ, loong64.ABLEZ},
  1259  	ssa.BlockLOONG64FPT:  {loong64.ABFPT, loong64.ABFPF},
  1260  	ssa.BlockLOONG64FPF:  {loong64.ABFPF, loong64.ABFPT},
  1261  	ssa.BlockLOONG64BEQ:  {loong64.ABEQ, loong64.ABNE},
  1262  	ssa.BlockLOONG64BNE:  {loong64.ABNE, loong64.ABEQ},
  1263  	ssa.BlockLOONG64BGE:  {loong64.ABGE, loong64.ABLT},
  1264  	ssa.BlockLOONG64BLT:  {loong64.ABLT, loong64.ABGE},
  1265  	ssa.BlockLOONG64BLTU: {loong64.ABLTU, loong64.ABGEU},
  1266  	ssa.BlockLOONG64BGEU: {loong64.ABGEU, loong64.ABLTU},
  1267  }
  1268  
  1269  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1270  	switch b.Kind {
  1271  	case ssa.BlockPlain, ssa.BlockDefer:
  1272  		if b.Succs[0].Block() != next {
  1273  			p := s.Prog(obj.AJMP)
  1274  			p.To.Type = obj.TYPE_BRANCH
  1275  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1276  		}
  1277  	case ssa.BlockExit, ssa.BlockRetJmp:
  1278  	case ssa.BlockRet:
  1279  		s.Prog(obj.ARET)
  1280  	case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1281  		ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1282  		ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1283  		ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1284  		ssa.BlockLOONG64BLT, ssa.BlockLOONG64BGE,
  1285  		ssa.BlockLOONG64BLTU, ssa.BlockLOONG64BGEU,
  1286  		ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1287  		jmp := blockJump[b.Kind]
  1288  		var p *obj.Prog
  1289  		switch next {
  1290  		case b.Succs[0].Block():
  1291  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1292  		case b.Succs[1].Block():
  1293  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1294  		default:
  1295  			if b.Likely != ssa.BranchUnlikely {
  1296  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1297  				s.Br(obj.AJMP, b.Succs[1].Block())
  1298  			} else {
  1299  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1300  				s.Br(obj.AJMP, b.Succs[0].Block())
  1301  			}
  1302  		}
  1303  		switch b.Kind {
  1304  		case ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
  1305  			ssa.BlockLOONG64BGE, ssa.BlockLOONG64BLT,
  1306  			ssa.BlockLOONG64BGEU, ssa.BlockLOONG64BLTU:
  1307  			p.From.Type = obj.TYPE_REG
  1308  			p.From.Reg = b.Controls[0].Reg()
  1309  			p.Reg = b.Controls[1].Reg()
  1310  		case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
  1311  			ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
  1312  			ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
  1313  			ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
  1314  			if !b.Controls[0].Type.IsFlags() {
  1315  				p.From.Type = obj.TYPE_REG
  1316  				p.From.Reg = b.Controls[0].Reg()
  1317  			}
  1318  		}
  1319  	case ssa.BlockLOONG64JUMPTABLE:
  1320  		// ALSLV $3, Rarg0, Rarg1, REGTMP
  1321  		// MOVV (REGTMP), REGTMP
  1322  		// JMP	(REGTMP)
  1323  		p := s.Prog(loong64.AALSLV)
  1324  		p.From.Type = obj.TYPE_CONST
  1325  		p.From.Offset = 3 // idx*8
  1326  		p.Reg = b.Controls[0].Reg()
  1327  		p.AddRestSourceReg(b.Controls[1].Reg())
  1328  		p.To.Type = obj.TYPE_REG
  1329  		p.To.Reg = loong64.REGTMP
  1330  		p1 := s.Prog(loong64.AMOVV)
  1331  		p1.From.Type = obj.TYPE_MEM
  1332  		p1.From.Reg = loong64.REGTMP
  1333  		p1.From.Offset = 0
  1334  		p1.To.Type = obj.TYPE_REG
  1335  		p1.To.Reg = loong64.REGTMP
  1336  		p2 := s.Prog(obj.AJMP)
  1337  		p2.To.Type = obj.TYPE_MEM
  1338  		p2.To.Reg = loong64.REGTMP
  1339  		// Save jump tables for later resolution of the target blocks.
  1340  		s.JumpTables = append(s.JumpTables, b)
  1341  
  1342  	default:
  1343  		b.Fatalf("branch not implemented: %s", b.LongString())
  1344  	}
  1345  }
  1346  
  1347  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1348  	p := s.Prog(loadByType(t, reg))
  1349  	p.From.Type = obj.TYPE_MEM
  1350  	p.From.Name = obj.NAME_AUTO
  1351  	p.From.Sym = n.Linksym()
  1352  	p.From.Offset = n.FrameOffset() + off
  1353  	p.To.Type = obj.TYPE_REG
  1354  	p.To.Reg = reg
  1355  	return p
  1356  }
  1357  
  1358  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1359  	p = pp.Append(p, storeByType(t, reg), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1360  	p.To.Name = obj.NAME_PARAM
  1361  	p.To.Sym = n.Linksym()
  1362  	p.Pos = p.Pos.WithNotStmt()
  1363  	return p
  1364  }
  1365  
  1366  // move8 copies 8 bytes at src+off to dst+off.
  1367  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1368  	// MOVV     off(src), tmp
  1369  	ld := s.Prog(loong64.AMOVV)
  1370  	ld.From.Type = obj.TYPE_MEM
  1371  	ld.From.Reg = src
  1372  	ld.From.Offset = off
  1373  	ld.To.Type = obj.TYPE_REG
  1374  	ld.To.Reg = tmp
  1375  	// MOVV     tmp, off(dst)
  1376  	st := s.Prog(loong64.AMOVV)
  1377  	st.From.Type = obj.TYPE_REG
  1378  	st.From.Reg = tmp
  1379  	st.To.Type = obj.TYPE_MEM
  1380  	st.To.Reg = dst
  1381  	st.To.Offset = off
  1382  }
  1383  
  1384  // zero8 zeroes 8 bytes at reg+off.
  1385  func zero8(s *ssagen.State, reg int16, off int64) {
  1386  	// MOVV   ZR, off(reg)
  1387  	p := s.Prog(loong64.AMOVV)
  1388  	p.From.Type = obj.TYPE_REG
  1389  	p.From.Reg = loong64.REGZERO
  1390  	p.To.Type = obj.TYPE_MEM
  1391  	p.To.Reg = reg
  1392  	p.To.Offset = off
  1393  }
  1394  
  1395  // zero16 zeroes 16 bytes at reg+off.
  1396  func zero16(s *ssagen.State, regZero, regBase int16, off int64) {
  1397  	// VMOVQ   regZero, off(regBase)
  1398  	p := s.Prog(loong64.AVMOVQ)
  1399  	p.From.Type = obj.TYPE_REG
  1400  	p.From.Reg = regZero
  1401  	p.To.Type = obj.TYPE_MEM
  1402  	p.To.Reg = regBase
  1403  	p.To.Offset = off
  1404  }
  1405  

View as plain text