Source file src/simd/archsimd/internal/simd_test/unary_amd64_test.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.simd && amd64
     6  
     7  package simd_test
     8  
     9  import (
    10  	"math"
    11  	"simd/archsimd"
    12  	"testing"
    13  )
    14  
    15  func TestAbsAMD64(t *testing.T) {
    16  	testFloat32x8Unary(t, archsimd.Float32x8.Abs, map1[float32](abs))
    17  	testFloat64x4Unary(t, archsimd.Float64x4.Abs, map1[float64](abs))
    18  	if archsimd.X86.AVX2() {
    19  		testInt8x32Unary(t, archsimd.Int8x32.Abs, map1[int8](abs))
    20  		testInt16x16Unary(t, archsimd.Int16x16.Abs, map1[int16](abs))
    21  		testInt32x8Unary(t, archsimd.Int32x8.Abs, map1[int32](abs))
    22  	}
    23  	if archsimd.X86.AVX512() {
    24  		testInt8x64Unary(t, archsimd.Int8x64.Abs, map1[int8](abs))
    25  		testInt16x32Unary(t, archsimd.Int16x32.Abs, map1[int16](abs))
    26  		testInt32x16Unary(t, archsimd.Int32x16.Abs, map1[int32](abs))
    27  		testInt64x4Unary(t, archsimd.Int64x4.Abs, map1[int64](abs))
    28  		testInt64x8Unary(t, archsimd.Int64x8.Abs, map1[int64](abs))
    29  		testFloat32x16Unary(t, archsimd.Float32x16.Abs, map1[float32](abs))
    30  		testFloat64x8Unary(t, archsimd.Float64x8.Abs, map1[float64](abs))
    31  	}
    32  }
    33  
    34  func TestNegAMD64(t *testing.T) {
    35  	testFloat32x8Unary(t, archsimd.Float32x8.Neg, map1[float32](neg))
    36  	testFloat64x4Unary(t, archsimd.Float64x4.Neg, map1[float64](neg))
    37  	if archsimd.X86.AVX2() {
    38  		testInt8x32Unary(t, archsimd.Int8x32.Neg, map1[int8](neg))
    39  		testInt16x16Unary(t, archsimd.Int16x16.Neg, map1[int16](neg))
    40  		testInt32x8Unary(t, archsimd.Int32x8.Neg, map1[int32](neg))
    41  		testInt64x4Unary(t, archsimd.Int64x4.Neg, map1[int64](neg))
    42  	}
    43  	if archsimd.X86.AVX512() {
    44  		testFloat32x16Unary(t, archsimd.Float32x16.Neg, map1[float32](neg))
    45  		testFloat64x8Unary(t, archsimd.Float64x8.Neg, map1[float64](neg))
    46  		testInt8x64Unary(t, archsimd.Int8x64.Neg, map1[int8](neg))
    47  		testInt16x32Unary(t, archsimd.Int16x32.Neg, map1[int16](neg))
    48  		testInt32x16Unary(t, archsimd.Int32x16.Neg, map1[int32](neg))
    49  		testInt64x8Unary(t, archsimd.Int64x8.Neg, map1[int64](neg))
    50  	}
    51  }
    52  
    53  func TestCeilAMD64(t *testing.T) {
    54  	testFloat32x8Unary(t, archsimd.Float32x8.Ceil, ceilSlice[float32])
    55  	testFloat64x4Unary(t, archsimd.Float64x4.Ceil, ceilSlice[float64])
    56  	if archsimd.X86.AVX512() {
    57  		// testFloat32x16Unary(t, archsimd.Float32x16.Ceil, ceilSlice[float32]) // missing
    58  		// testFloat64x8Unary(t, archsimd.Float64x8.Ceil, ceilSlice[float64])   // missing
    59  	}
    60  }
    61  
    62  func TestFloorAMD64(t *testing.T) {
    63  	testFloat32x8Unary(t, archsimd.Float32x8.Floor, floorSlice[float32])
    64  	testFloat64x4Unary(t, archsimd.Float64x4.Floor, floorSlice[float64])
    65  	if archsimd.X86.AVX512() {
    66  		// testFloat32x16Unary(t, archsimd.Float32x16.Floor, floorSlice[float32]) // missing
    67  		// testFloat64x8Unary(t, archsimd.Float64x8.Floor, floorSlice[float64])   // missing
    68  	}
    69  }
    70  
    71  func TestTruncAMD64(t *testing.T) {
    72  	testFloat32x8Unary(t, archsimd.Float32x8.Trunc, truncSlice[float32])
    73  	testFloat64x4Unary(t, archsimd.Float64x4.Trunc, truncSlice[float64])
    74  	if archsimd.X86.AVX512() {
    75  		// testFloat32x16Unary(t, archsimd.Float32x16.Trunc, truncSlice[float32]) // missing
    76  		// testFloat64x8Unary(t, archsimd.Float64x8.Trunc, truncSlice[float64])   // missing
    77  	}
    78  }
    79  
    80  func TestRoundAMD64(t *testing.T) {
    81  	testFloat32x8Unary(t, archsimd.Float32x8.Round, roundSlice[float32])
    82  	testFloat64x4Unary(t, archsimd.Float64x4.Round, roundSlice[float64])
    83  	if archsimd.X86.AVX512() {
    84  		// testFloat32x16Unary(t, archsimd.Float32x16.Round, roundSlice[float32]) // missing
    85  		// testFloat64x8Unary(t, archsimd.Float64x8.Round, roundSlice[float64])   // missing
    86  	}
    87  }
    88  
    89  func TestSqrtAMD64(t *testing.T) {
    90  	testFloat32x8Unary(t, archsimd.Float32x8.Sqrt, sqrtSlice[float32])
    91  	testFloat64x4Unary(t, archsimd.Float64x4.Sqrt, sqrtSlice[float64])
    92  	if archsimd.X86.AVX512() {
    93  		testFloat32x16Unary(t, archsimd.Float32x16.Sqrt, sqrtSlice[float32])
    94  		testFloat64x8Unary(t, archsimd.Float64x8.Sqrt, sqrtSlice[float64])
    95  	}
    96  }
    97  
    98  func TestNotAMD64(t *testing.T) {
    99  	testInt16x8Unary(t, archsimd.Int16x8.Not, map1[int16](not))
   100  
   101  	if archsimd.X86.AVX2() {
   102  		testInt8x32Unary(t, archsimd.Int8x32.Not, map1[int8](not))
   103  		testInt16x16Unary(t, archsimd.Int16x16.Not, map1[int16](not))
   104  		testInt32x8Unary(t, archsimd.Int32x8.Not, map1[int32](not))
   105  	}
   106  }
   107  
   108  func TestCeilScaledResidue(t *testing.T) {
   109  	if !archsimd.X86.AVX512() {
   110  		t.Skip("Needs AVX512")
   111  	}
   112  	testFloat64x8UnaryFlaky(t,
   113  		func(x archsimd.Float64x8) archsimd.Float64x8 { return x.CeilScaledResidue(0) },
   114  		map1(ceilResidueForPrecision[float64](0)),
   115  		0.001)
   116  	testFloat64x8UnaryFlaky(t,
   117  		func(x archsimd.Float64x8) archsimd.Float64x8 { return x.CeilScaledResidue(1) },
   118  		map1(ceilResidueForPrecision[float64](1)),
   119  		0.001)
   120  	testFloat64x8Unary(t,
   121  		func(x archsimd.Float64x8) archsimd.Float64x8 { return x.Sub(x.CeilScaled(0)) },
   122  		map1[float64](func(x float64) float64 { return x - math.Ceil(x) }))
   123  }
   124  
   125  func TestConvert(t *testing.T) {
   126  	testFloat64x2ConvertToFloat32(t, archsimd.Float64x2.ConvertToFloat32, map1n[float64](toFloat32, 4))
   127  	testFloat64x4ConvertToFloat32(t, archsimd.Float64x4.ConvertToFloat32, map1[float64](toFloat32))
   128  	testFloat32x4ConvertToFloat64(t, archsimd.Float32x4.ConvertToFloat64, map1[float32](toFloat64))
   129  
   130  	testFloat32x4ConvertToInt32(t, archsimd.Float32x4.ConvertToInt32, map1[float32](floatToInt32_x86))
   131  	testFloat32x8ConvertToInt32(t, archsimd.Float32x8.ConvertToInt32, map1[float32](floatToInt32_x86))
   132  	testFloat64x2ConvertToInt32(t, archsimd.Float64x2.ConvertToInt32, map1n[float64](floatToInt32_x86, 4))
   133  	testFloat64x4ConvertToInt32(t, archsimd.Float64x4.ConvertToInt32, map1[float64](floatToInt32_x86))
   134  
   135  	testInt32x4ConvertToFloat32(t, archsimd.Int32x4.ConvertToFloat32, map1[int32](toFloat32))
   136  	testInt32x8ConvertToFloat32(t, archsimd.Int32x8.ConvertToFloat32, map1[int32](toFloat32))
   137  	testInt32x4ConvertToFloat64(t, archsimd.Int32x4.ConvertToFloat64, map1[int32](toFloat64))
   138  
   139  	if archsimd.X86.AVX512() {
   140  		testFloat32x8ConvertToFloat64(t, archsimd.Float32x8.ConvertToFloat64, map1[float32](toFloat64))
   141  		testFloat64x8ConvertToFloat32(t, archsimd.Float64x8.ConvertToFloat32, map1[float64](toFloat32))
   142  
   143  		testFloat32x16ConvertToInt32(t, archsimd.Float32x16.ConvertToInt32, map1[float32](floatToInt32_x86))
   144  		testFloat64x8ConvertToInt32(t, archsimd.Float64x8.ConvertToInt32, map1[float64](floatToInt32_x86))
   145  		testFloat32x4ConvertToInt64(t, archsimd.Float32x4.ConvertToInt64, map1[float32](floatToInt64_x86))
   146  		testFloat32x8ConvertToInt64(t, archsimd.Float32x8.ConvertToInt64, map1[float32](floatToInt64_x86))
   147  		testFloat64x2ConvertToInt64(t, archsimd.Float64x2.ConvertToInt64, map1[float64](floatToInt64_x86))
   148  		testFloat64x4ConvertToInt64(t, archsimd.Float64x4.ConvertToInt64, map1[float64](floatToInt64_x86))
   149  		testFloat64x8ConvertToInt64(t, archsimd.Float64x8.ConvertToInt64, map1[float64](floatToInt64_x86))
   150  
   151  		testFloat32x4ConvertToUint32(t, archsimd.Float32x4.ConvertToUint32, map1[float32](floatToUint32_x86))
   152  		testFloat32x8ConvertToUint32(t, archsimd.Float32x8.ConvertToUint32, map1[float32](floatToUint32_x86))
   153  		testFloat32x16ConvertToUint32(t, archsimd.Float32x16.ConvertToUint32, map1[float32](floatToUint32_x86))
   154  		testFloat64x2ConvertToUint32(t, archsimd.Float64x2.ConvertToUint32, map1n[float64](floatToUint32_x86, 4))
   155  		testFloat64x4ConvertToUint32(t, archsimd.Float64x4.ConvertToUint32, map1[float64](floatToUint32_x86))
   156  		testFloat64x8ConvertToUint32(t, archsimd.Float64x8.ConvertToUint32, map1[float64](floatToUint32_x86))
   157  		testFloat32x4ConvertToUint64(t, archsimd.Float32x4.ConvertToUint64, map1[float32](floatToUint64_x86))
   158  		testFloat32x8ConvertToUint64(t, archsimd.Float32x8.ConvertToUint64, map1[float32](floatToUint64_x86))
   159  		testFloat64x2ConvertToUint64(t, archsimd.Float64x2.ConvertToUint64, map1[float64](floatToUint64_x86))
   160  		testFloat64x4ConvertToUint64(t, archsimd.Float64x4.ConvertToUint64, map1[float64](floatToUint64_x86))
   161  		testFloat64x8ConvertToUint64(t, archsimd.Float64x8.ConvertToUint64, map1[float64](floatToUint64_x86))
   162  
   163  		testInt32x16ConvertToFloat32(t, archsimd.Int32x16.ConvertToFloat32, map1[int32](toFloat32))
   164  		testInt64x2ConvertToFloat32(t, archsimd.Int64x2.ConvertToFloat32, map1n[int64](toFloat32, 4))
   165  		testInt64x4ConvertToFloat32(t, archsimd.Int64x4.ConvertToFloat32, map1[int64](toFloat32))
   166  		testInt64x8ConvertToFloat32(t, archsimd.Int64x8.ConvertToFloat32, map1[int64](toFloat32))
   167  		testInt64x2ConvertToFloat64(t, archsimd.Int64x2.ConvertToFloat64, map1[int64](toFloat64))
   168  		testInt64x4ConvertToFloat64(t, archsimd.Int64x4.ConvertToFloat64, map1[int64](toFloat64))
   169  		testInt64x8ConvertToFloat64(t, archsimd.Int64x8.ConvertToFloat64, map1[int64](toFloat64))
   170  
   171  		testUint32x4ConvertToFloat32(t, archsimd.Uint32x4.ConvertToFloat32, map1[uint32](toFloat32))
   172  		testUint32x8ConvertToFloat32(t, archsimd.Uint32x8.ConvertToFloat32, map1[uint32](toFloat32))
   173  		testUint32x16ConvertToFloat32(t, archsimd.Uint32x16.ConvertToFloat32, map1[uint32](toFloat32))
   174  		testUint64x2ConvertToFloat32(t, archsimd.Uint64x2.ConvertToFloat32, map1n[uint64](toFloat32, 4))
   175  		testUint64x4ConvertToFloat32(t, archsimd.Uint64x4.ConvertToFloat32, map1[uint64](toFloat32))
   176  		testUint64x8ConvertToFloat32(t, archsimd.Uint64x8.ConvertToFloat32, map1[uint64](toFloat32))
   177  		testUint32x4ConvertToFloat64(t, archsimd.Uint32x4.ConvertToFloat64, map1[uint32](toFloat64))
   178  		testUint32x8ConvertToFloat64(t, archsimd.Uint32x8.ConvertToFloat64, map1[uint32](toFloat64))
   179  		testUint64x2ConvertToFloat64(t, archsimd.Uint64x2.ConvertToFloat64, map1[uint64](toFloat64))
   180  		testUint64x4ConvertToFloat64(t, archsimd.Uint64x4.ConvertToFloat64, map1[uint64](toFloat64))
   181  		testUint64x8ConvertToFloat64(t, archsimd.Uint64x8.ConvertToFloat64, map1[uint64](toFloat64))
   182  	}
   183  }
   184  
   185  func TestExtend(t *testing.T) {
   186  	if archsimd.X86.AVX2() {
   187  		testInt8x16ConvertToInt16(t, archsimd.Int8x16.ExtendToInt16, map1[int8](toInt16))
   188  		testInt16x8ConvertToInt32(t, archsimd.Int16x8.ExtendToInt32, map1[int16](toInt32))
   189  		testInt32x4ConvertToInt64(t, archsimd.Int32x4.ExtendToInt64, map1[int32](toInt64))
   190  		testUint8x16ConvertToUint16(t, archsimd.Uint8x16.ExtendToUint16, map1[uint8](toUint16))
   191  		testUint16x8ConvertToUint32(t, archsimd.Uint16x8.ExtendToUint32, map1[uint16](toUint32))
   192  		testUint32x4ConvertToUint64(t, archsimd.Uint32x4.ExtendToUint64, map1[uint32](toUint64))
   193  	}
   194  
   195  	if archsimd.X86.AVX512() {
   196  		testInt8x32ConvertToInt16(t, archsimd.Int8x32.ExtendToInt16, map1[int8](toInt16))
   197  		testInt8x16ConvertToInt32(t, archsimd.Int8x16.ExtendToInt32, map1[int8](toInt32))
   198  		testInt16x16ConvertToInt32(t, archsimd.Int16x16.ExtendToInt32, map1[int16](toInt32))
   199  		testInt16x8ConvertToInt64(t, archsimd.Int16x8.ExtendToInt64, map1[int16](toInt64))
   200  		testInt32x8ConvertToInt64(t, archsimd.Int32x8.ExtendToInt64, map1[int32](toInt64))
   201  		testUint8x32ConvertToUint16(t, archsimd.Uint8x32.ExtendToUint16, map1[uint8](toUint16))
   202  		testUint8x16ConvertToUint32(t, archsimd.Uint8x16.ExtendToUint32, map1[uint8](toUint32))
   203  		testUint16x16ConvertToUint32(t, archsimd.Uint16x16.ExtendToUint32, map1[uint16](toUint32))
   204  		testUint16x8ConvertToUint64(t, archsimd.Uint16x8.ExtendToUint64, map1[uint16](toUint64))
   205  		testUint32x8ConvertToUint64(t, archsimd.Uint32x8.ExtendToUint64, map1[uint32](toUint64))
   206  	}
   207  }
   208  
   209  func TestExtendLo(t *testing.T) {
   210  	testInt8x16ConvertLoToInt64x2(t, archsimd.Int8x16.ExtendLo2ToInt64, map1n[int8](toInt64, 2))
   211  	testInt16x8ConvertLoToInt64x2(t, archsimd.Int16x8.ExtendLo2ToInt64, map1n[int16](toInt64, 2))
   212  	testInt32x4ConvertLoToInt64x2(t, archsimd.Int32x4.ExtendLo2ToInt64, map1n[int32](toInt64, 2))
   213  	testUint8x16ConvertLoToUint64x2(t, archsimd.Uint8x16.ExtendLo2ToUint64, map1n[uint8](toUint64, 2))
   214  	testUint16x8ConvertLoToUint64x2(t, archsimd.Uint16x8.ExtendLo2ToUint64, map1n[uint16](toUint64, 2))
   215  	testUint32x4ConvertLoToUint64x2(t, archsimd.Uint32x4.ExtendLo2ToUint64, map1n[uint32](toUint64, 2))
   216  	testInt8x16ConvertLoToInt32x4(t, archsimd.Int8x16.ExtendLo4ToInt32, map1n[int8](toInt32, 4))
   217  	testInt16x8ConvertLoToInt32x4(t, archsimd.Int16x8.ExtendLo4ToInt32, map1n[int16](toInt32, 4))
   218  	testUint8x16ConvertLoToUint32x4(t, archsimd.Uint8x16.ExtendLo4ToUint32, map1n[uint8](toUint32, 4))
   219  	testUint16x8ConvertLoToUint32x4(t, archsimd.Uint16x8.ExtendLo4ToUint32, map1n[uint16](toUint32, 4))
   220  	testInt8x16ConvertLoToInt16x8(t, archsimd.Int8x16.ExtendLo8ToInt16, map1n[int8](toInt16, 8))
   221  	testUint8x16ConvertLoToUint16x8(t, archsimd.Uint8x16.ExtendLo8ToUint16, map1n[uint8](toUint16, 8))
   222  
   223  	if archsimd.X86.AVX2() {
   224  		testInt8x16ConvertLoToInt64x4(t, archsimd.Int8x16.ExtendLo4ToInt64, map1n[int8](toInt64, 4))
   225  		testInt16x8ConvertLoToInt64x4(t, archsimd.Int16x8.ExtendLo4ToInt64, map1n[int16](toInt64, 4))
   226  		testUint8x16ConvertLoToUint64x4(t, archsimd.Uint8x16.ExtendLo4ToUint64, map1n[uint8](toUint64, 4))
   227  		testUint16x8ConvertLoToUint64x4(t, archsimd.Uint16x8.ExtendLo4ToUint64, map1n[uint16](toUint64, 4))
   228  		testInt8x16ConvertLoToInt32x8(t, archsimd.Int8x16.ExtendLo8ToInt32, map1n[int8](toInt32, 8))
   229  		testUint8x16ConvertLoToUint32x8(t, archsimd.Uint8x16.ExtendLo8ToUint32, map1n[uint8](toUint32, 8))
   230  	}
   231  
   232  	if archsimd.X86.AVX512() {
   233  		testInt8x16ConvertToInt64(t, archsimd.Int8x16.ExtendLo8ToInt64, map1n[int8](toInt64, 8))
   234  		testUint8x16ConvertToUint64(t, archsimd.Uint8x16.ExtendLo8ToUint64, map1n[uint8](toUint64, 8))
   235  	}
   236  }
   237  
   238  func TestTruncate(t *testing.T) {
   239  	if archsimd.X86.AVX512() {
   240  		testInt16x8ConvertToInt8(t, archsimd.Int16x8.TruncToInt8, map1n[int16](toInt8, 16))
   241  		testInt16x16ConvertToInt8(t, archsimd.Int16x16.TruncToInt8, map1[int16](toInt8))
   242  		testInt16x32ConvertToInt8(t, archsimd.Int16x32.TruncToInt8, map1[int16](toInt8))
   243  		testInt32x4ConvertToInt8(t, archsimd.Int32x4.TruncToInt8, map1n[int32](toInt8, 16))
   244  		testInt32x8ConvertToInt8(t, archsimd.Int32x8.TruncToInt8, map1n[int32](toInt8, 16))
   245  		testInt32x16ConvertToInt8(t, archsimd.Int32x16.TruncToInt8, map1[int32](toInt8))
   246  		testInt64x2ConvertToInt8(t, archsimd.Int64x2.TruncToInt8, map1n[int64](toInt8, 16))
   247  		testInt64x4ConvertToInt8(t, archsimd.Int64x4.TruncToInt8, map1n[int64](toInt8, 16))
   248  		testInt64x8ConvertToInt8(t, archsimd.Int64x8.TruncToInt8, map1n[int64](toInt8, 16))
   249  		testInt32x4ConvertToInt16(t, archsimd.Int32x4.TruncToInt16, map1n[int32](toInt16, 8))
   250  		testInt32x8ConvertToInt16(t, archsimd.Int32x8.TruncToInt16, map1[int32](toInt16))
   251  		testInt32x16ConvertToInt16(t, archsimd.Int32x16.TruncToInt16, map1[int32](toInt16))
   252  		testInt64x2ConvertToInt16(t, archsimd.Int64x2.TruncToInt16, map1n[int64](toInt16, 8))
   253  		testInt64x4ConvertToInt16(t, archsimd.Int64x4.TruncToInt16, map1n[int64](toInt16, 8))
   254  		testInt64x8ConvertToInt16(t, archsimd.Int64x8.TruncToInt16, map1[int64](toInt16))
   255  		testInt64x2ConvertToInt32(t, archsimd.Int64x2.TruncToInt32, map1n[int64](toInt32, 4))
   256  		testInt64x4ConvertToInt32(t, archsimd.Int64x4.TruncToInt32, map1[int64](toInt32))
   257  		testInt64x8ConvertToInt32(t, archsimd.Int64x8.TruncToInt32, map1[int64](toInt32))
   258  
   259  		testUint16x8ConvertToUint8(t, archsimd.Uint16x8.TruncToUint8, map1n[uint16](toUint8, 16))
   260  		testUint16x16ConvertToUint8(t, archsimd.Uint16x16.TruncToUint8, map1[uint16](toUint8))
   261  		testUint16x32ConvertToUint8(t, archsimd.Uint16x32.TruncToUint8, map1[uint16](toUint8))
   262  		testUint32x4ConvertToUint8(t, archsimd.Uint32x4.TruncToUint8, map1n[uint32](toUint8, 16))
   263  		testUint32x8ConvertToUint8(t, archsimd.Uint32x8.TruncToUint8, map1n[uint32](toUint8, 16))
   264  		testUint32x16ConvertToUint8(t, archsimd.Uint32x16.TruncToUint8, map1[uint32](toUint8))
   265  		testUint64x2ConvertToUint8(t, archsimd.Uint64x2.TruncToUint8, map1n[uint64](toUint8, 16))
   266  		testUint64x4ConvertToUint8(t, archsimd.Uint64x4.TruncToUint8, map1n[uint64](toUint8, 16))
   267  		testUint64x8ConvertToUint8(t, archsimd.Uint64x8.TruncToUint8, map1n[uint64](toUint8, 16))
   268  		testUint32x4ConvertToUint16(t, archsimd.Uint32x4.TruncToUint16, map1n[uint32](toUint16, 8))
   269  		testUint32x8ConvertToUint16(t, archsimd.Uint32x8.TruncToUint16, map1[uint32](toUint16))
   270  		testUint32x16ConvertToUint16(t, archsimd.Uint32x16.TruncToUint16, map1[uint32](toUint16))
   271  		testUint64x2ConvertToUint16(t, archsimd.Uint64x2.TruncToUint16, map1n[uint64](toUint16, 8))
   272  		testUint64x4ConvertToUint16(t, archsimd.Uint64x4.TruncToUint16, map1n[uint64](toUint16, 8))
   273  		testUint64x8ConvertToUint16(t, archsimd.Uint64x8.TruncToUint16, map1[uint64](toUint16))
   274  		testUint64x2ConvertToUint32(t, archsimd.Uint64x2.TruncToUint32, map1n[uint64](toUint32, 4))
   275  		testUint64x4ConvertToUint32(t, archsimd.Uint64x4.TruncToUint32, map1[uint64](toUint32))
   276  		testUint64x8ConvertToUint32(t, archsimd.Uint64x8.TruncToUint32, map1[uint64](toUint32))
   277  	}
   278  }
   279  
   280  func TestSaturate(t *testing.T) {
   281  	if archsimd.X86.AVX512() {
   282  		testInt16x8ConvertToInt8(t, archsimd.Int16x8.SaturateToInt8, map1n[int16](satToInt8, 16))
   283  		testInt16x16ConvertToInt8(t, archsimd.Int16x16.SaturateToInt8, map1[int16](satToInt8))
   284  		testInt16x32ConvertToInt8(t, archsimd.Int16x32.SaturateToInt8, map1[int16](satToInt8))
   285  		testInt32x4ConvertToInt8(t, archsimd.Int32x4.SaturateToInt8, map1n[int32](satToInt8, 16))
   286  		testInt32x8ConvertToInt8(t, archsimd.Int32x8.SaturateToInt8, map1n[int32](satToInt8, 16))
   287  		testInt32x16ConvertToInt8(t, archsimd.Int32x16.SaturateToInt8, map1[int32](satToInt8))
   288  		testInt64x2ConvertToInt8(t, archsimd.Int64x2.SaturateToInt8, map1n[int64](satToInt8, 16))
   289  		testInt64x4ConvertToInt8(t, archsimd.Int64x4.SaturateToInt8, map1n[int64](satToInt8, 16))
   290  		testInt64x8ConvertToInt8(t, archsimd.Int64x8.SaturateToInt8, map1n[int64](satToInt8, 16))
   291  		testInt32x4ConvertToInt16(t, archsimd.Int32x4.SaturateToInt16, map1n[int32](satToInt16, 8))
   292  		testInt32x8ConvertToInt16(t, archsimd.Int32x8.SaturateToInt16, map1[int32](satToInt16))
   293  		testInt32x16ConvertToInt16(t, archsimd.Int32x16.SaturateToInt16, map1[int32](satToInt16))
   294  		testInt64x2ConvertToInt16(t, archsimd.Int64x2.SaturateToInt16, map1n[int64](satToInt16, 8))
   295  		testInt64x4ConvertToInt16(t, archsimd.Int64x4.SaturateToInt16, map1n[int64](satToInt16, 8))
   296  		testInt64x8ConvertToInt16(t, archsimd.Int64x8.SaturateToInt16, map1[int64](satToInt16))
   297  		testInt64x2ConvertToInt32(t, archsimd.Int64x2.SaturateToInt32, map1n[int64](satToInt32, 4))
   298  		testInt64x4ConvertToInt32(t, archsimd.Int64x4.SaturateToInt32, map1[int64](satToInt32))
   299  		testInt64x8ConvertToInt32(t, archsimd.Int64x8.SaturateToInt32, map1[int64](satToInt32))
   300  
   301  		testUint16x8ConvertToUint8(t, archsimd.Uint16x8.SaturateToUint8, map1n[uint16](satToUint8, 16))
   302  		testUint16x16ConvertToUint8(t, archsimd.Uint16x16.SaturateToUint8, map1[uint16](satToUint8))
   303  		testUint16x32ConvertToUint8(t, archsimd.Uint16x32.SaturateToUint8, map1[uint16](satToUint8))
   304  		testUint32x4ConvertToUint8(t, archsimd.Uint32x4.SaturateToUint8, map1n[uint32](satToUint8, 16))
   305  		testUint32x8ConvertToUint8(t, archsimd.Uint32x8.SaturateToUint8, map1n[uint32](satToUint8, 16))
   306  		testUint32x16ConvertToUint8(t, archsimd.Uint32x16.SaturateToUint8, map1[uint32](satToUint8))
   307  		testUint64x2ConvertToUint8(t, archsimd.Uint64x2.SaturateToUint8, map1n[uint64](satToUint8, 16))
   308  		testUint64x4ConvertToUint8(t, archsimd.Uint64x4.SaturateToUint8, map1n[uint64](satToUint8, 16))
   309  		testUint64x8ConvertToUint8(t, archsimd.Uint64x8.SaturateToUint8, map1n[uint64](satToUint8, 16))
   310  		testUint32x4ConvertToUint16(t, archsimd.Uint32x4.SaturateToUint16, map1n[uint32](satToUint16, 8))
   311  		testUint32x8ConvertToUint16(t, archsimd.Uint32x8.SaturateToUint16, map1[uint32](satToUint16))
   312  		testUint32x16ConvertToUint16(t, archsimd.Uint32x16.SaturateToUint16, map1[uint32](satToUint16))
   313  		testUint64x2ConvertToUint16(t, archsimd.Uint64x2.SaturateToUint16, map1n[uint64](satToUint16, 8))
   314  		testUint64x4ConvertToUint16(t, archsimd.Uint64x4.SaturateToUint16, map1n[uint64](satToUint16, 8))
   315  		testUint64x8ConvertToUint16(t, archsimd.Uint64x8.SaturateToUint16, map1[uint64](satToUint16))
   316  		testUint64x2ConvertToUint32(t, archsimd.Uint64x2.SaturateToUint32, map1n[uint64](satToUint32, 4))
   317  		testUint64x4ConvertToUint32(t, archsimd.Uint64x4.SaturateToUint32, map1[uint64](satToUint32))
   318  		testUint64x8ConvertToUint32(t, archsimd.Uint64x8.SaturateToUint32, map1[uint64](satToUint32))
   319  	}
   320  }
   321  
   322  func TestOnesCountAMD64(t *testing.T) {
   323  	if archsimd.X86.AVX512BITALG() {
   324  		// 128-bit
   325  		testInt16x8Unary(t, archsimd.Int16x8.OnesCount, map1[int16](onesCount))
   326  		testUint16x8Unary(t, archsimd.Uint16x8.OnesCount, map1[uint16](onesCount))
   327  
   328  		// 256-bit
   329  		testInt8x32Unary(t, archsimd.Int8x32.OnesCount, map1[int8](onesCount))
   330  		testUint8x32Unary(t, archsimd.Uint8x32.OnesCount, map1[uint8](onesCount))
   331  		testInt16x16Unary(t, archsimd.Int16x16.OnesCount, map1[int16](onesCount))
   332  		testUint16x16Unary(t, archsimd.Uint16x16.OnesCount, map1[uint16](onesCount))
   333  
   334  		// 512-bit
   335  		testInt8x64Unary(t, archsimd.Int8x64.OnesCount, map1[int8](onesCount))
   336  		testUint8x64Unary(t, archsimd.Uint8x64.OnesCount, map1[uint8](onesCount))
   337  		testInt16x32Unary(t, archsimd.Int16x32.OnesCount, map1[int16](onesCount))
   338  		testUint16x32Unary(t, archsimd.Uint16x32.OnesCount, map1[uint16](onesCount))
   339  	}
   340  
   341  	if archsimd.X86.AVX512VPOPCNTDQ() {
   342  		// 128-bit
   343  		testInt32x4Unary(t, archsimd.Int32x4.OnesCount, map1[int32](onesCount))
   344  		testUint32x4Unary(t, archsimd.Uint32x4.OnesCount, map1[uint32](onesCount))
   345  		testInt64x2Unary(t, archsimd.Int64x2.OnesCount, map1[int64](onesCount))
   346  		testUint64x2Unary(t, archsimd.Uint64x2.OnesCount, map1[uint64](onesCount))
   347  
   348  		// 256-bit
   349  		testInt32x8Unary(t, archsimd.Int32x8.OnesCount, map1[int32](onesCount))
   350  		testUint32x8Unary(t, archsimd.Uint32x8.OnesCount, map1[uint32](onesCount))
   351  		testInt64x4Unary(t, archsimd.Int64x4.OnesCount, map1[int64](onesCount))
   352  		testUint64x4Unary(t, archsimd.Uint64x4.OnesCount, map1[uint64](onesCount))
   353  
   354  		// 512-bit
   355  		testInt32x16Unary(t, archsimd.Int32x16.OnesCount, map1[int32](onesCount))
   356  		testUint32x16Unary(t, archsimd.Uint32x16.OnesCount, map1[uint32](onesCount))
   357  		testInt64x8Unary(t, archsimd.Int64x8.OnesCount, map1[int64](onesCount))
   358  		testUint64x8Unary(t, archsimd.Uint64x8.OnesCount, map1[uint64](onesCount))
   359  	}
   360  }
   361  

View as plain text