Source file
src/simd/simd_emulated.go
1
2
3
4
5
6
7 package simd
8
9 import (
10 "fmt"
11 "math"
12 "math/bits"
13 )
14
15
16 func VectorBitSize() int {
17 return 128
18 }
19
20
21 func Emulated() bool {
22 return true
23 }
24
25
26
27
28
29
30
31 func HasHardwareCarrylessMultiply() bool {
32 return false
33 }
34
35
36 func LoadInt8s(s []int8) Int8s {
37 var a, b uint64
38 for i := 0; i < 16; i++ {
39 val := uint64(uint8(s[i]))
40 if i < 8 {
41 a |= val << (8 * i)
42 } else {
43 b |= val << (8 * (i - 8))
44 }
45 }
46 return Int8s{a: a, b: b}
47 }
48
49
50 func LoadInt8sPart(s []int8) (Int8s, int) {
51 var a, b uint64
52 n := len(s)
53 if n > 16 {
54 n = 16
55 }
56 for i := 0; i < n; i++ {
57 val := uint64(uint8(s[i]))
58 if i < 8 {
59 a |= val << (8 * i)
60 } else {
61 b |= val << (8 * (i - 8))
62 }
63 }
64 return Int8s{a: a, b: b}, n
65 }
66
67 func (x Int8s) get(i int) int8 {
68 if i < 8 {
69 return int8(x.a >> (8 * i))
70 }
71 return int8(x.b >> (8 * (i - 8)))
72 }
73
74 func (x *Int8s) set(i int, v int8) {
75 val := uint64(uint8(v))
76 if i < 8 {
77 mask := uint64(0xff) << (8 * i)
78 x.a = (x.a &^ mask) | (val << (8 * i))
79 } else {
80 mask := uint64(0xff) << (8 * (i - 8))
81 x.b = (x.b &^ mask) | (val << (8 * (i - 8)))
82 }
83 }
84
85
86 func (x Int8s) Abs() Int8s {
87 var res Int8s
88 for i := 0; i < 16; i++ {
89 v := x.get(i)
90 if v < 0 {
91 res.set(i, -v)
92 } else {
93 res.set(i, v)
94 }
95 }
96 return res
97 }
98
99
100 func (x Int8s) Add(y Int8s) Int8s {
101 var res Int8s
102 for i := 0; i < 16; i++ {
103 res.set(i, x.get(i)+y.get(i))
104 }
105 return res
106 }
107
108
109 func (x Int8s) AddSaturated(y Int8s) Int8s {
110 var res Int8s
111 for i := 0; i < 16; i++ {
112 sum := int(x.get(i)) + int(y.get(i))
113 if sum > math.MaxInt8 {
114 res.set(i, math.MaxInt8)
115 } else if sum < math.MinInt8 {
116 res.set(i, math.MinInt8)
117 } else {
118 res.set(i, int8(sum))
119 }
120 }
121 return res
122 }
123
124
125 func (x Int8s) And(y Int8s) Int8s {
126 return Int8s{a: x.a & y.a, b: x.b & y.b}
127 }
128
129
130 func (x Int8s) AndNot(y Int8s) Int8s {
131 return Int8s{a: x.a &^ y.a, b: x.b &^ y.b}
132 }
133
134
135 func (x Int8s) Equal(y Int8s) Mask8s {
136 var res Mask8s
137 for i := 0; i < 16; i++ {
138 if x.get(i) == y.get(i) {
139 res.set(i, true)
140 }
141 }
142 return res
143 }
144
145
146 func (x Int8s) Greater(y Int8s) Mask8s {
147 var res Mask8s
148 for i := 0; i < 16; i++ {
149 if x.get(i) > y.get(i) {
150 res.set(i, true)
151 }
152 }
153 return res
154 }
155
156
157 func (x Int8s) GreaterEqual(y Int8s) Mask8s {
158 var res Mask8s
159 for i := 0; i < 16; i++ {
160 if x.get(i) >= y.get(i) {
161 res.set(i, true)
162 }
163 }
164 return res
165 }
166
167
168 func (x Int8s) Less(y Int8s) Mask8s {
169 var res Mask8s
170 for i := 0; i < 16; i++ {
171 if x.get(i) < y.get(i) {
172 res.set(i, true)
173 }
174 }
175 return res
176 }
177
178
179 func (x Int8s) LessEqual(y Int8s) Mask8s {
180 var res Mask8s
181 for i := 0; i < 16; i++ {
182 if x.get(i) <= y.get(i) {
183 res.set(i, true)
184 }
185 }
186 return res
187 }
188
189
190 func (x Int8s) NotEqual(y Int8s) Mask8s {
191 var res Mask8s
192 for i := 0; i < 16; i++ {
193 if x.get(i) != y.get(i) {
194 res.set(i, true)
195 }
196 }
197 return res
198 }
199
200
201 func (x Int8s) Len() int {
202 return 16
203 }
204
205
206 func (x Int8s) Masked(mask Mask8s) Int8s {
207 return Int8s{a: x.a & mask.a, b: x.b & mask.b}
208 }
209
210
211 func (x Int8s) Max(y Int8s) Int8s {
212 var res Int8s
213 for i := 0; i < 16; i++ {
214 vx := x.get(i)
215 vy := y.get(i)
216 if vx > vy {
217 res.set(i, vx)
218 } else {
219 res.set(i, vy)
220 }
221 }
222 return res
223 }
224
225
226 func (x Int8s) Mul(y Int8s) Int8s {
227 var res Int8s
228 for i := 0; i < 16; i++ {
229 res.set(i, x.get(i)*y.get(i))
230 }
231 return res
232 }
233
234
235 func (x Int8s) IfElse(mask Mask8s, y Int8s) Int8s {
236 return Int8s{
237 a: (x.a & mask.a) | (y.a &^ mask.a),
238 b: (x.b & mask.b) | (y.b &^ mask.b),
239 }
240 }
241
242
243 func (x Int8s) Min(y Int8s) Int8s {
244 var res Int8s
245 for i := 0; i < 16; i++ {
246 vx := x.get(i)
247 vy := y.get(i)
248 if vx < vy {
249 res.set(i, vx)
250 } else {
251 res.set(i, vy)
252 }
253 }
254 return res
255 }
256
257
258 func (x Int8s) Neg() Int8s {
259 var res Int8s
260 for i := 0; i < 16; i++ {
261 res.set(i, -x.get(i))
262 }
263 return res
264 }
265
266
267 func (x Int8s) Not() Int8s {
268 return Int8s{a: ^x.a, b: ^x.b}
269 }
270
271
272 func (x Int8s) Or(y Int8s) Int8s {
273 return Int8s{a: x.a | y.a, b: x.b | y.b}
274 }
275
276
277 func (x Int8s) Store(s []int8) {
278 for i := 0; i < 16 && i < len(s); i++ {
279 s[i] = x.get(i)
280 }
281 }
282
283
284 func (x Int8s) StorePart(s []int8) int {
285 x.Store(s)
286 return min(len(s), x.Len())
287 }
288
289
290 func (x Int8s) String() string {
291 var parts [16]int8
292 for i := 0; i < 16; i++ {
293 parts[i] = x.get(i)
294 }
295 return fmt.Sprint(parts)
296 }
297
298
299 func (x Int8s) Sub(y Int8s) Int8s {
300 var res Int8s
301 for i := 0; i < 16; i++ {
302 res.set(i, x.get(i)-y.get(i))
303 }
304 return res
305 }
306
307
308 func (x Int8s) SubSaturated(y Int8s) Int8s {
309 var res Int8s
310 for i := 0; i < 16; i++ {
311 diff := int(x.get(i)) - int(y.get(i))
312 if diff > math.MaxInt8 {
313 res.set(i, math.MaxInt8)
314 } else if diff < math.MinInt8 {
315 res.set(i, math.MinInt8)
316 } else {
317 res.set(i, int8(diff))
318 }
319 }
320 return res
321 }
322
323
324 func (x Int8s) ToMask() Mask8s {
325 var res Mask8s
326 for i := 0; i < 16; i++ {
327 if x.get(i) != 0 {
328 res.set(i, true)
329 }
330 }
331 return res
332 }
333
334
335 func (x Int8s) Xor(y Int8s) Int8s {
336 return Int8s{a: x.a ^ y.a, b: x.b ^ y.b}
337 }
338
339
340 func (x Int8s) ConvertToUint8() Uint8s {
341 return Uint8s{a: x.a, b: x.b}
342 }
343
344
345 func (x Int8s) ToBits() Uint8s {
346 return Uint8s{a: x.a, b: x.b}
347 }
348
349
350 func LoadInt16s(s []int16) Int16s {
351 var a, b uint64
352 for i := 0; i < 8; i++ {
353 val := uint64(uint16(s[i]))
354 if i < 4 {
355 a |= val << (16 * i)
356 } else {
357 b |= val << (16 * (i - 4))
358 }
359 }
360 return Int16s{a: a, b: b}
361 }
362
363
364 func LoadInt16sPart(s []int16) (Int16s, int) {
365 var a, b uint64
366 n := len(s)
367 if n > 8 {
368 n = 8
369 }
370 for i := 0; i < n; i++ {
371 val := uint64(uint16(s[i]))
372 if i < 4 {
373 a |= val << (16 * i)
374 } else {
375 b |= val << (16 * (i - 4))
376 }
377 }
378 return Int16s{a: a, b: b}, n
379 }
380
381 func (x Int16s) get(i int) int16 {
382 if i < 4 {
383 return int16(x.a >> (16 * i))
384 }
385 return int16(x.b >> (16 * (i - 4)))
386 }
387
388 func (x *Int16s) set(i int, v int16) {
389 val := uint64(uint16(v))
390 if i < 4 {
391 mask := uint64(0xffff) << (16 * i)
392 x.a = (x.a &^ mask) | (val << (16 * i))
393 } else {
394 mask := uint64(0xffff) << (16 * (i - 4))
395 x.b = (x.b &^ mask) | (val << (16 * (i - 4)))
396 }
397 }
398
399
400 func (x Int16s) Abs() Int16s {
401 var res Int16s
402 for i := 0; i < 8; i++ {
403 v := x.get(i)
404 if v < 0 {
405 res.set(i, -v)
406 } else {
407 res.set(i, v)
408 }
409 }
410 return res
411 }
412
413
414 func (x Int16s) Add(y Int16s) Int16s {
415 var res Int16s
416 for i := 0; i < 8; i++ {
417 res.set(i, x.get(i)+y.get(i))
418 }
419 return res
420 }
421
422
423 func (x Int16s) AddSaturated(y Int16s) Int16s {
424 var res Int16s
425 for i := 0; i < 8; i++ {
426 sum := int(x.get(i)) + int(y.get(i))
427 if sum > math.MaxInt16 {
428 res.set(i, math.MaxInt16)
429 } else if sum < math.MinInt16 {
430 res.set(i, math.MinInt16)
431 } else {
432 res.set(i, int16(sum))
433 }
434 }
435 return res
436 }
437
438
439 func (x Int16s) And(y Int16s) Int16s {
440 return Int16s{a: x.a & y.a, b: x.b & y.b}
441 }
442
443
444 func (x Int16s) AndNot(y Int16s) Int16s {
445 return Int16s{a: x.a &^ y.a, b: x.b &^ y.b}
446 }
447
448
449 func (x Int16s) Equal(y Int16s) Mask16s {
450 var res Mask16s
451 for i := 0; i < 8; i++ {
452 if x.get(i) == y.get(i) {
453 res.set(i, true)
454 }
455 }
456 return res
457 }
458
459
460 func (x Int16s) Greater(y Int16s) Mask16s {
461 var res Mask16s
462 for i := 0; i < 8; i++ {
463 if x.get(i) > y.get(i) {
464 res.set(i, true)
465 }
466 }
467 return res
468 }
469
470
471 func (x Int16s) GreaterEqual(y Int16s) Mask16s {
472 var res Mask16s
473 for i := 0; i < 8; i++ {
474 if x.get(i) >= y.get(i) {
475 res.set(i, true)
476 }
477 }
478 return res
479 }
480
481
482 func (x Int16s) Less(y Int16s) Mask16s {
483 var res Mask16s
484 for i := 0; i < 8; i++ {
485 if x.get(i) < y.get(i) {
486 res.set(i, true)
487 }
488 }
489 return res
490 }
491
492
493 func (x Int16s) LessEqual(y Int16s) Mask16s {
494 var res Mask16s
495 for i := 0; i < 8; i++ {
496 if x.get(i) <= y.get(i) {
497 res.set(i, true)
498 }
499 }
500 return res
501 }
502
503
504 func (x Int16s) NotEqual(y Int16s) Mask16s {
505 var res Mask16s
506 for i := 0; i < 8; i++ {
507 if x.get(i) != y.get(i) {
508 res.set(i, true)
509 }
510 }
511 return res
512 }
513
514
515 func (x Int16s) Len() int {
516 return 8
517 }
518
519
520 func (x Int16s) Masked(mask Mask16s) Int16s {
521 return Int16s{a: x.a & mask.a, b: x.b & mask.b}
522 }
523
524
525 func (x Int16s) Max(y Int16s) Int16s {
526 var res Int16s
527 for i := 0; i < 8; i++ {
528 vx := x.get(i)
529 vy := y.get(i)
530 if vx > vy {
531 res.set(i, vx)
532 } else {
533 res.set(i, vy)
534 }
535 }
536 return res
537 }
538
539
540 func (x Int16s) IfElse(mask Mask16s, y Int16s) Int16s {
541 return Int16s{
542 a: (x.a & mask.a) | (y.a &^ mask.a),
543 b: (x.b & mask.b) | (y.b &^ mask.b),
544 }
545 }
546
547
548 func (x Int16s) Min(y Int16s) Int16s {
549 var res Int16s
550 for i := 0; i < 8; i++ {
551 vx := x.get(i)
552 vy := y.get(i)
553 if vx < vy {
554 res.set(i, vx)
555 } else {
556 res.set(i, vy)
557 }
558 }
559 return res
560 }
561
562
563 func (x Int16s) Mul(y Int16s) Int16s {
564 var res Int16s
565 for i := 0; i < 8; i++ {
566 res.set(i, x.get(i)*y.get(i))
567 }
568 return res
569 }
570
571
572 func (x Int16s) Neg() Int16s {
573 var res Int16s
574 for i := 0; i < 8; i++ {
575 res.set(i, -x.get(i))
576 }
577 return res
578 }
579
580
581 func (x Int16s) Not() Int16s {
582 return Int16s{a: ^x.a, b: ^x.b}
583 }
584
585
586 func (x Int16s) Or(y Int16s) Int16s {
587 return Int16s{a: x.a | y.a, b: x.b | y.b}
588 }
589
590
591 func (x Int16s) ShiftAllLeft(y uint8) Int16s {
592 var res Int16s
593 for i := 0; i < 8; i++ {
594 res.set(i, x.get(i)<<y)
595 }
596 return res
597 }
598
599
600 func (x Int16s) ShiftAllRight(y uint8) Int16s {
601 var res Int16s
602 for i := 0; i < 8; i++ {
603 res.set(i, x.get(i)>>y)
604 }
605 return res
606 }
607
608
609 func (x Int16s) RotateAllLeft(dist uint64) Int16s {
610 var res Int16s
611 d := dist & 15
612 for i := 0; i < 8; i++ {
613 u := uint16(x.get(i))
614 r := (u << d) | (u >> ((16 - d) & 15))
615 res.set(i, int16(r))
616 }
617 return res
618 }
619
620
621 func (x Int16s) RotateAllRight(dist uint64) Int16s {
622 var res Int16s
623 d := dist & 15
624 for i := 0; i < 8; i++ {
625 u := uint16(x.get(i))
626 r := (u >> d) | (u << ((16 - d) & 15))
627 res.set(i, int16(r))
628 }
629 return res
630 }
631
632
633 func (x Int16s) Store(s []int16) {
634 for i := 0; i < 8 && i < len(s); i++ {
635 s[i] = x.get(i)
636 }
637 }
638
639
640 func (x Int16s) StorePart(s []int16) int {
641 x.Store(s)
642 return min(len(s), x.Len())
643 }
644
645
646 func (x Int16s) String() string {
647 var parts [8]int16
648 for i := 0; i < 8; i++ {
649 parts[i] = x.get(i)
650 }
651 return fmt.Sprint(parts)
652 }
653
654
655 func (x Int16s) Sub(y Int16s) Int16s {
656 var res Int16s
657 for i := 0; i < 8; i++ {
658 res.set(i, x.get(i)-y.get(i))
659 }
660 return res
661 }
662
663
664 func (x Int16s) SubSaturated(y Int16s) Int16s {
665 var res Int16s
666 for i := 0; i < 8; i++ {
667 diff := int(x.get(i)) - int(y.get(i))
668 if diff > math.MaxInt16 {
669 res.set(i, math.MaxInt16)
670 } else if diff < math.MinInt16 {
671 res.set(i, math.MinInt16)
672 } else {
673 res.set(i, int16(diff))
674 }
675 }
676 return res
677 }
678
679
680 func (x Int16s) ToMask() Mask16s {
681 var res Mask16s
682 for i := 0; i < 8; i++ {
683 if x.get(i) != 0 {
684 res.set(i, true)
685 }
686 }
687 return res
688 }
689
690
691 func (x Int16s) Xor(y Int16s) Int16s {
692 return Int16s{a: x.a ^ y.a, b: x.b ^ y.b}
693 }
694
695
696 func (x Int16s) ConvertToUint16() Uint16s {
697 return Uint16s{a: x.a, b: x.b}
698 }
699
700
701 func (x Int16s) ToBits() Uint16s {
702 return Uint16s{a: x.a, b: x.b}
703 }
704
705
706 func LoadInt32s(s []int32) Int32s {
707 var a, b uint64
708 for i := 0; i < 4; i++ {
709 val := uint64(uint32(s[i]))
710 if i < 2 {
711 a |= val << (32 * i)
712 } else {
713 b |= val << (32 * (i - 2))
714 }
715 }
716 return Int32s{a: a, b: b}
717 }
718
719
720 func LoadInt32sPart(s []int32) (Int32s, int) {
721 var a, b uint64
722 n := len(s)
723 if n > 4 {
724 n = 4
725 }
726 for i := 0; i < n; i++ {
727 val := uint64(uint32(s[i]))
728 if i < 2 {
729 a |= val << (32 * i)
730 } else {
731 b |= val << (32 * (i - 2))
732 }
733 }
734 return Int32s{a: a, b: b}, n
735 }
736
737 func (x Int32s) get(i int) int32 {
738 if i < 2 {
739 return int32(x.a >> (32 * i))
740 }
741 return int32(x.b >> (32 * (i - 2)))
742 }
743
744 func (x *Int32s) set(i int, v int32) {
745 val := uint64(uint32(v))
746 if i < 2 {
747 mask := uint64(0xffffffff) << (32 * i)
748 x.a = (x.a &^ mask) | (val << (32 * i))
749 } else {
750 mask := uint64(0xffffffff) << (32 * (i - 2))
751 x.b = (x.b &^ mask) | (val << (32 * (i - 2)))
752 }
753 }
754
755
756 func (x Int32s) Abs() Int32s {
757 var res Int32s
758 for i := 0; i < 4; i++ {
759 v := x.get(i)
760 if v < 0 {
761 res.set(i, -v)
762 } else {
763 res.set(i, v)
764 }
765 }
766 return res
767 }
768
769
770 func (x Int32s) Add(y Int32s) Int32s {
771 var res Int32s
772 for i := 0; i < 4; i++ {
773 res.set(i, x.get(i)+y.get(i))
774 }
775 return res
776 }
777
778
779 func (x Int32s) And(y Int32s) Int32s {
780 return Int32s{a: x.a & y.a, b: x.b & y.b}
781 }
782
783
784 func (x Int32s) AndNot(y Int32s) Int32s {
785 return Int32s{a: x.a &^ y.a, b: x.b &^ y.b}
786 }
787
788
789 func (x Int32s) ConvertToFloat32() Float32s {
790 var res Float32s
791 for i := 0; i < 4; i++ {
792 res.set(i, float32(x.get(i)))
793 }
794 return res
795 }
796
797
798 func (x Int32s) Equal(y Int32s) Mask32s {
799 var res Mask32s
800 for i := 0; i < 4; i++ {
801 if x.get(i) == y.get(i) {
802 res.set(i, true)
803 }
804 }
805 return res
806 }
807
808
809 func (x Int32s) Greater(y Int32s) Mask32s {
810 var res Mask32s
811 for i := 0; i < 4; i++ {
812 if x.get(i) > y.get(i) {
813 res.set(i, true)
814 }
815 }
816 return res
817 }
818
819
820 func (x Int32s) GreaterEqual(y Int32s) Mask32s {
821 var res Mask32s
822 for i := 0; i < 4; i++ {
823 if x.get(i) >= y.get(i) {
824 res.set(i, true)
825 }
826 }
827 return res
828 }
829
830
831 func (x Int32s) Less(y Int32s) Mask32s {
832 var res Mask32s
833 for i := 0; i < 4; i++ {
834 if x.get(i) < y.get(i) {
835 res.set(i, true)
836 }
837 }
838 return res
839 }
840
841
842 func (x Int32s) LessEqual(y Int32s) Mask32s {
843 var res Mask32s
844 for i := 0; i < 4; i++ {
845 if x.get(i) <= y.get(i) {
846 res.set(i, true)
847 }
848 }
849 return res
850 }
851
852
853 func (x Int32s) NotEqual(y Int32s) Mask32s {
854 var res Mask32s
855 for i := 0; i < 4; i++ {
856 if x.get(i) != y.get(i) {
857 res.set(i, true)
858 }
859 }
860 return res
861 }
862
863
864 func (x Int32s) Len() int {
865 return 4
866 }
867
868
869 func (x Int32s) Masked(mask Mask32s) Int32s {
870 return Int32s{a: x.a & mask.a, b: x.b & mask.b}
871 }
872
873
874 func (x Int32s) Max(y Int32s) Int32s {
875 var res Int32s
876 for i := 0; i < 4; i++ {
877 vx := x.get(i)
878 vy := y.get(i)
879 if vx > vy {
880 res.set(i, vx)
881 } else {
882 res.set(i, vy)
883 }
884 }
885 return res
886 }
887
888
889 func (x Int32s) IfElse(mask Mask32s, y Int32s) Int32s {
890 return Int32s{
891 a: (x.a & mask.a) | (y.a &^ mask.a),
892 b: (x.b & mask.b) | (y.b &^ mask.b),
893 }
894 }
895
896
897 func (x Int32s) Min(y Int32s) Int32s {
898 var res Int32s
899 for i := 0; i < 4; i++ {
900 vx := x.get(i)
901 vy := y.get(i)
902 if vx < vy {
903 res.set(i, vx)
904 } else {
905 res.set(i, vy)
906 }
907 }
908 return res
909 }
910
911
912 func (x Int32s) Mul(y Int32s) Int32s {
913 var res Int32s
914 for i := 0; i < 4; i++ {
915 res.set(i, x.get(i)*y.get(i))
916 }
917 return res
918 }
919
920
921 func (x Int32s) Neg() Int32s {
922 var res Int32s
923 for i := 0; i < 4; i++ {
924 res.set(i, -x.get(i))
925 }
926 return res
927 }
928
929
930 func (x Int32s) Not() Int32s {
931 return Int32s{a: ^x.a, b: ^x.b}
932 }
933
934
935 func (x Int32s) Or(y Int32s) Int32s {
936 return Int32s{a: x.a | y.a, b: x.b | y.b}
937 }
938
939
940 func (x Int32s) ShiftAllLeft(y uint8) Int32s {
941 var res Int32s
942 for i := 0; i < 4; i++ {
943 res.set(i, x.get(i)<<y)
944 }
945 return res
946 }
947
948
949 func (x Int32s) ShiftAllRight(y uint8) Int32s {
950 var res Int32s
951 for i := 0; i < 4; i++ {
952 res.set(i, x.get(i)>>y)
953 }
954 return res
955 }
956
957
958 func (x Int32s) RotateAllLeft(dist uint64) Int32s {
959 var res Int32s
960 d := dist & 31
961 for i := 0; i < 4; i++ {
962 u := uint32(x.get(i))
963 r := (u << d) | (u >> ((32 - d) & 31))
964 res.set(i, int32(r))
965 }
966 return res
967 }
968
969
970 func (x Int32s) RotateAllRight(dist uint64) Int32s {
971 var res Int32s
972 d := dist & 31
973 for i := 0; i < 4; i++ {
974 u := uint32(x.get(i))
975 r := (u >> d) | (u << ((32 - d) & 31))
976 res.set(i, int32(r))
977 }
978 return res
979 }
980
981
982 func (x Int32s) Store(s []int32) {
983 for i := 0; i < 4 && i < len(s); i++ {
984 s[i] = x.get(i)
985 }
986 }
987
988
989 func (x Int32s) StorePart(s []int32) int {
990 x.Store(s)
991 return min(len(s), x.Len())
992 }
993
994
995 func (x Int32s) String() string {
996 var parts [4]int32
997 for i := 0; i < 4; i++ {
998 parts[i] = x.get(i)
999 }
1000 return fmt.Sprint(parts)
1001 }
1002
1003
1004 func (x Int32s) Sub(y Int32s) Int32s {
1005 var res Int32s
1006 for i := 0; i < 4; i++ {
1007 res.set(i, x.get(i)-y.get(i))
1008 }
1009 return res
1010 }
1011
1012
1013 func (x Int32s) ToMask() Mask32s {
1014 var res Mask32s
1015 for i := 0; i < 4; i++ {
1016 if x.get(i) != 0 {
1017 res.set(i, true)
1018 }
1019 }
1020 return res
1021 }
1022
1023
1024 func (x Int32s) Xor(y Int32s) Int32s {
1025 return Int32s{a: x.a ^ y.a, b: x.b ^ y.b}
1026 }
1027
1028
1029 func (x Int32s) ConvertToUint32() Uint32s {
1030 return Uint32s{a: x.a, b: x.b}
1031 }
1032
1033
1034 func (x Int32s) ToBits() Uint32s {
1035 return Uint32s{a: x.a, b: x.b}
1036 }
1037
1038
1039 func LoadInt64s(s []int64) Int64s {
1040 var a, b uint64
1041 a = uint64(s[0])
1042 b = uint64(s[1])
1043 return Int64s{a: a, b: b}
1044 }
1045
1046
1047 func LoadInt64sPart(s []int64) (Int64s, int) {
1048 var a, b uint64
1049 if len(s) > 0 {
1050 a = uint64(s[0])
1051 }
1052 if len(s) > 1 {
1053 b = uint64(s[1])
1054 }
1055 return Int64s{a: a, b: b}, len(s)
1056 }
1057
1058 func (x Int64s) get(i int) int64 {
1059 if i == 0 {
1060 return int64(x.a)
1061 }
1062 return int64(x.b)
1063 }
1064
1065 func (x *Int64s) set(i int, v int64) {
1066 if i == 0 {
1067 x.a = uint64(v)
1068 } else {
1069 x.b = uint64(v)
1070 }
1071 }
1072
1073
1074 func (x Int64s) Add(y Int64s) Int64s {
1075 return Int64s{a: x.a + y.a, b: x.b + y.b}
1076 }
1077
1078
1079 func (x Int64s) And(y Int64s) Int64s {
1080 return Int64s{a: x.a & y.a, b: x.b & y.b}
1081 }
1082
1083
1084 func (x Int64s) AndNot(y Int64s) Int64s {
1085 return Int64s{a: x.a &^ y.a, b: x.b &^ y.b}
1086 }
1087
1088
1089 func (x Int64s) Equal(y Int64s) Mask64s {
1090 var res Mask64s
1091 if x.a == y.a {
1092 res.a = ^uint64(0)
1093 }
1094 if x.b == y.b {
1095 res.b = ^uint64(0)
1096 }
1097 return res
1098 }
1099
1100
1101 func (x Int64s) Greater(y Int64s) Mask64s {
1102 var res Mask64s
1103 if int64(x.a) > int64(y.a) {
1104 res.a = ^uint64(0)
1105 }
1106 if int64(x.b) > int64(y.b) {
1107 res.b = ^uint64(0)
1108 }
1109 return res
1110 }
1111
1112
1113 func (x Int64s) GreaterEqual(y Int64s) Mask64s {
1114 var res Mask64s
1115 if int64(x.a) >= int64(y.a) {
1116 res.a = ^uint64(0)
1117 }
1118 if int64(x.b) >= int64(y.b) {
1119 res.b = ^uint64(0)
1120 }
1121 return res
1122 }
1123
1124
1125 func (x Int64s) Less(y Int64s) Mask64s {
1126 var res Mask64s
1127 if int64(x.a) < int64(y.a) {
1128 res.a = ^uint64(0)
1129 }
1130 if int64(x.b) < int64(y.b) {
1131 res.b = ^uint64(0)
1132 }
1133 return res
1134 }
1135
1136
1137 func (x Int64s) LessEqual(y Int64s) Mask64s {
1138 var res Mask64s
1139 if int64(x.a) <= int64(y.a) {
1140 res.a = ^uint64(0)
1141 }
1142 if int64(x.b) <= int64(y.b) {
1143 res.b = ^uint64(0)
1144 }
1145 return res
1146 }
1147
1148
1149 func (x Int64s) NotEqual(y Int64s) Mask64s {
1150 var res Mask64s
1151 if x.a != y.a {
1152 res.a = ^uint64(0)
1153 }
1154 if x.b != y.b {
1155 res.b = ^uint64(0)
1156 }
1157 return res
1158 }
1159
1160
1161 func (x Int64s) Len() int {
1162 return 2
1163 }
1164
1165
1166 func (x Int64s) Masked(mask Mask64s) Int64s {
1167 return Int64s{a: x.a & mask.a, b: x.b & mask.b}
1168 }
1169
1170
1171 func (x Int64s) IfElse(mask Mask64s, y Int64s) Int64s {
1172 return Int64s{
1173 a: (x.a & mask.a) | (y.a &^ mask.a),
1174 b: (x.b & mask.b) | (y.b &^ mask.b),
1175 }
1176 }
1177
1178
1179 func (x Int64s) Neg() Int64s {
1180 return Int64s{a: uint64(-int64(x.a)), b: uint64(-int64(x.b))}
1181 }
1182
1183
1184 func (x Int64s) Not() Int64s {
1185 return Int64s{a: ^x.a, b: ^x.b}
1186 }
1187
1188
1189 func (x Int64s) Or(y Int64s) Int64s {
1190 return Int64s{a: x.a | y.a, b: x.b | y.b}
1191 }
1192
1193
1194 func (x Int64s) ShiftAllLeft(y uint8) Int64s {
1195 return Int64s{a: x.a << y, b: x.b << y}
1196 }
1197
1198
1199 func (x Int64s) RotateAllLeft(dist uint64) Int64s {
1200 d := dist & 63
1201 return Int64s{
1202 a: (x.a << d) | (x.a >> ((64 - d) & 63)),
1203 b: (x.b << d) | (x.b >> ((64 - d) & 63)),
1204 }
1205 }
1206
1207
1208 func (x Int64s) RotateAllRight(dist uint64) Int64s {
1209 d := dist & 63
1210 return Int64s{
1211 a: (x.a >> d) | (x.a << ((64 - d) & 63)),
1212 b: (x.b >> d) | (x.b << ((64 - d) & 63)),
1213 }
1214 }
1215
1216
1217 func (x Int64s) Store(s []int64) {
1218 if len(s) > 0 {
1219 s[0] = int64(x.a)
1220 }
1221 if len(s) > 1 {
1222 s[1] = int64(x.b)
1223 }
1224 }
1225
1226
1227 func (x Int64s) StorePart(s []int64) int {
1228 x.Store(s)
1229 return min(len(s), x.Len())
1230 }
1231
1232
1233 func (x Int64s) String() string {
1234 return fmt.Sprint([2]int64{int64(x.a), int64(x.b)})
1235 }
1236
1237
1238 func (x Int64s) Sub(y Int64s) Int64s {
1239 return Int64s{a: x.a - y.a, b: x.b - y.b}
1240 }
1241
1242
1243 func (x Int64s) ToMask() Mask64s {
1244 var res Mask64s
1245 if x.a != 0 {
1246 res.a = ^uint64(0)
1247 }
1248 if x.b != 0 {
1249 res.b = ^uint64(0)
1250 }
1251 return res
1252 }
1253
1254
1255 func (x Int64s) Xor(y Int64s) Int64s {
1256 return Int64s{a: x.a ^ y.a, b: x.b ^ y.b}
1257 }
1258
1259
1260 func (x Int64s) ConvertToUint64() Uint64s {
1261 return Uint64s{a: x.a, b: x.b}
1262 }
1263
1264
1265 func (x Int64s) ToBits() Uint64s {
1266 return Uint64s{a: x.a, b: x.b}
1267 }
1268
1269
1270 func LoadUint8s(s []uint8) Uint8s {
1271 var a, b uint64
1272 for i := 0; i < 16; i++ {
1273 val := uint64(s[i])
1274 if i < 8 {
1275 a |= val << (8 * i)
1276 } else {
1277 b |= val << (8 * (i - 8))
1278 }
1279 }
1280 return Uint8s{a: a, b: b}
1281 }
1282
1283
1284 func LoadUint8sPart(s []uint8) (Uint8s, int) {
1285 var a, b uint64
1286 n := len(s)
1287 if n > 16 {
1288 n = 16
1289 }
1290 for i := 0; i < n; i++ {
1291 val := uint64(s[i])
1292 if i < 8 {
1293 a |= val << (8 * i)
1294 } else {
1295 b |= val << (8 * (i - 8))
1296 }
1297 }
1298 return Uint8s{a: a, b: b}, n
1299 }
1300
1301 func (x Uint8s) get(i int) uint8 {
1302 if i < 8 {
1303 return uint8(x.a >> (8 * i))
1304 }
1305 return uint8(x.b >> (8 * (i - 8)))
1306 }
1307
1308 func (x *Uint8s) set(i int, v uint8) {
1309 val := uint64(v)
1310 if i < 8 {
1311 mask := uint64(0xff) << (8 * i)
1312 x.a = (x.a &^ mask) | (val << (8 * i))
1313 } else {
1314 mask := uint64(0xff) << (8 * (i - 8))
1315 x.b = (x.b &^ mask) | (val << (8 * (i - 8)))
1316 }
1317 }
1318
1319
1320 func (x Uint8s) Add(y Uint8s) Uint8s {
1321 var res Uint8s
1322 for i := 0; i < 16; i++ {
1323 res.set(i, x.get(i)+y.get(i))
1324 }
1325 return res
1326 }
1327
1328
1329 func (x Uint8s) AddSaturated(y Uint8s) Uint8s {
1330 var res Uint8s
1331 for i := 0; i < 16; i++ {
1332 sum := int(x.get(i)) + int(y.get(i))
1333 if sum > math.MaxUint8 {
1334 res.set(i, math.MaxUint8)
1335 } else {
1336 res.set(i, uint8(sum))
1337 }
1338 }
1339 return res
1340 }
1341
1342
1343 func (x Uint8s) And(y Uint8s) Uint8s {
1344 return Uint8s{a: x.a & y.a, b: x.b & y.b}
1345 }
1346
1347
1348 func (x Uint8s) AndNot(y Uint8s) Uint8s {
1349 return Uint8s{a: x.a &^ y.a, b: x.b &^ y.b}
1350 }
1351
1352
1353 func (x Uint8s) Average(y Uint8s) Uint8s {
1354 var res Uint8s
1355 for i := 0; i < 16; i++ {
1356 res.set(i, uint8((int(x.get(i))+int(y.get(i))+1)>>1))
1357 }
1358 return res
1359 }
1360
1361
1362 func (x Uint8s) Equal(y Uint8s) Mask8s {
1363 var res Mask8s
1364 for i := 0; i < 16; i++ {
1365 if x.get(i) == y.get(i) {
1366 res.set(i, true)
1367 }
1368 }
1369 return res
1370 }
1371
1372
1373 func (x Uint8s) NotEqual(y Uint8s) Mask8s {
1374 var res Mask8s
1375 for i := 0; i < 16; i++ {
1376 if x.get(i) != y.get(i) {
1377 res.set(i, true)
1378 }
1379 }
1380 return res
1381 }
1382
1383
1384 func (x Uint8s) Len() int {
1385 return 16
1386 }
1387
1388
1389 func (x Uint8s) Masked(mask Mask8s) Uint8s {
1390 return Uint8s{a: x.a & mask.a, b: x.b & mask.b}
1391 }
1392
1393
1394 func (x Uint8s) Max(y Uint8s) Uint8s {
1395 var res Uint8s
1396 for i := 0; i < 16; i++ {
1397 vx := x.get(i)
1398 vy := y.get(i)
1399 if vx > vy {
1400 res.set(i, vx)
1401 } else {
1402 res.set(i, vy)
1403 }
1404 }
1405 return res
1406 }
1407
1408
1409 func (x Uint8s) IfElse(mask Mask8s, y Uint8s) Uint8s {
1410 return Uint8s{
1411 a: (x.a & mask.a) | (y.a &^ mask.a),
1412 b: (x.b & mask.b) | (y.b &^ mask.b),
1413 }
1414 }
1415
1416
1417 func (x Uint8s) Min(y Uint8s) Uint8s {
1418 var res Uint8s
1419 for i := 0; i < 16; i++ {
1420 vx := x.get(i)
1421 vy := y.get(i)
1422 if vx < vy {
1423 res.set(i, vx)
1424 } else {
1425 res.set(i, vy)
1426 }
1427 }
1428 return res
1429 }
1430
1431
1432 func (x Uint8s) Mul(y Uint8s) Uint8s {
1433 var res Uint8s
1434 for i := 0; i < 16; i++ {
1435 res.set(i, x.get(i)*y.get(i))
1436 }
1437 return res
1438 }
1439
1440
1441 func (x Uint8s) Not() Uint8s {
1442 return Uint8s{a: ^x.a, b: ^x.b}
1443 }
1444
1445
1446 func (x Uint8s) Or(y Uint8s) Uint8s {
1447 return Uint8s{a: x.a | y.a, b: x.b | y.b}
1448 }
1449
1450
1451 func (x Uint8s) Store(s []uint8) {
1452 for i := 0; i < 16 && i < len(s); i++ {
1453 s[i] = x.get(i)
1454 }
1455 }
1456
1457
1458 func (x Uint8s) StorePart(s []uint8) int {
1459 x.Store(s)
1460 return min(len(s), x.Len())
1461 }
1462
1463
1464 func (x Uint8s) String() string {
1465 var parts [16]uint8
1466 for i := 0; i < 16; i++ {
1467 parts[i] = x.get(i)
1468 }
1469 return fmt.Sprint(parts)
1470 }
1471
1472
1473 func (x Uint8s) Sub(y Uint8s) Uint8s {
1474 var res Uint8s
1475 for i := 0; i < 16; i++ {
1476 res.set(i, x.get(i)-y.get(i))
1477 }
1478 return res
1479 }
1480
1481
1482 func (x Uint8s) SubSaturated(y Uint8s) Uint8s {
1483 var res Uint8s
1484 for i := 0; i < 16; i++ {
1485 vx := x.get(i)
1486 vy := y.get(i)
1487 if vx < vy {
1488 res.set(i, 0)
1489 } else {
1490 res.set(i, vx-vy)
1491 }
1492 }
1493 return res
1494 }
1495
1496
1497 func (x Uint8s) Xor(y Uint8s) Uint8s {
1498 return Uint8s{a: x.a ^ y.a, b: x.b ^ y.b}
1499 }
1500
1501
1502 func (x Uint8s) BitsToInt8() Int8s {
1503 return Int8s{a: x.a, b: x.b}
1504 }
1505
1506
1507 func (x Uint8s) ConvertToInt8() Int8s {
1508 return Int8s{a: x.a, b: x.b}
1509 }
1510
1511
1512 func (x Uint8s) ReshapeToUint16s() Uint16s {
1513 return Uint16s{a: x.a, b: x.b}
1514 }
1515
1516
1517 func (x Uint8s) ReshapeToUint32s() Uint32s {
1518 return Uint32s{a: x.a, b: x.b}
1519 }
1520
1521
1522 func (x Uint8s) ReshapeToUint64s() Uint64s {
1523 return Uint64s{a: x.a, b: x.b}
1524 }
1525
1526
1527 func LoadUint16s(s []uint16) Uint16s {
1528 var a, b uint64
1529 for i := 0; i < 8; i++ {
1530 val := uint64(s[i])
1531 if i < 4 {
1532 a |= val << (16 * i)
1533 } else {
1534 b |= val << (16 * (i - 4))
1535 }
1536 }
1537 return Uint16s{a: a, b: b}
1538 }
1539
1540
1541 func LoadUint16sPart(s []uint16) (Uint16s, int) {
1542 var a, b uint64
1543 n := len(s)
1544 if n > 8 {
1545 n = 8
1546 }
1547 for i := 0; i < n; i++ {
1548 val := uint64(s[i])
1549 if i < 4 {
1550 a |= val << (16 * i)
1551 } else {
1552 b |= val << (16 * (i - 4))
1553 }
1554 }
1555 return Uint16s{a: a, b: b}, n
1556 }
1557
1558 func (x Uint16s) get(i int) uint16 {
1559 if i < 4 {
1560 return uint16(x.a >> (16 * i))
1561 }
1562 return uint16(x.b >> (16 * (i - 4)))
1563 }
1564
1565 func (x *Uint16s) set(i int, v uint16) {
1566 val := uint64(v)
1567 if i < 4 {
1568 mask := uint64(0xffff) << (16 * i)
1569 x.a = (x.a &^ mask) | (val << (16 * i))
1570 } else {
1571 mask := uint64(0xffff) << (16 * (i - 4))
1572 x.b = (x.b &^ mask) | (val << (16 * (i - 4)))
1573 }
1574 }
1575
1576
1577 func (x Uint16s) Add(y Uint16s) Uint16s {
1578 var res Uint16s
1579 for i := 0; i < 8; i++ {
1580 res.set(i, x.get(i)+y.get(i))
1581 }
1582 return res
1583 }
1584
1585
1586 func (x Uint16s) AddSaturated(y Uint16s) Uint16s {
1587 var res Uint16s
1588 for i := 0; i < 8; i++ {
1589 sum := int(x.get(i)) + int(y.get(i))
1590 if sum > math.MaxUint16 {
1591 res.set(i, math.MaxUint16)
1592 } else {
1593 res.set(i, uint16(sum))
1594 }
1595 }
1596 return res
1597 }
1598
1599
1600 func (x Uint16s) And(y Uint16s) Uint16s {
1601 return Uint16s{a: x.a & y.a, b: x.b & y.b}
1602 }
1603
1604
1605 func (x Uint16s) AndNot(y Uint16s) Uint16s {
1606 return Uint16s{a: x.a &^ y.a, b: x.b &^ y.b}
1607 }
1608
1609
1610 func (x Uint16s) Average(y Uint16s) Uint16s {
1611 var res Uint16s
1612 for i := 0; i < 8; i++ {
1613 res.set(i, uint16((int(x.get(i))+int(y.get(i))+1)>>1))
1614 }
1615 return res
1616 }
1617
1618
1619 func (x Uint16s) Equal(y Uint16s) Mask16s {
1620 var res Mask16s
1621 for i := 0; i < 8; i++ {
1622 if x.get(i) == y.get(i) {
1623 res.set(i, true)
1624 }
1625 }
1626 return res
1627 }
1628
1629
1630 func (x Uint16s) Greater(y Uint16s) Mask16s {
1631 var res Mask16s
1632 for i := 0; i < 8; i++ {
1633 if x.get(i) > y.get(i) {
1634 res.set(i, true)
1635 }
1636 }
1637 return res
1638 }
1639
1640
1641 func (x Uint16s) GreaterEqual(y Uint16s) Mask16s {
1642 var res Mask16s
1643 for i := 0; i < 8; i++ {
1644 if x.get(i) >= y.get(i) {
1645 res.set(i, true)
1646 }
1647 }
1648 return res
1649 }
1650
1651
1652 func (x Uint16s) Less(y Uint16s) Mask16s {
1653 var res Mask16s
1654 for i := 0; i < 8; i++ {
1655 if x.get(i) < y.get(i) {
1656 res.set(i, true)
1657 }
1658 }
1659 return res
1660 }
1661
1662
1663 func (x Uint16s) LessEqual(y Uint16s) Mask16s {
1664 var res Mask16s
1665 for i := 0; i < 8; i++ {
1666 if x.get(i) <= y.get(i) {
1667 res.set(i, true)
1668 }
1669 }
1670 return res
1671 }
1672
1673
1674 func (x Uint16s) NotEqual(y Uint16s) Mask16s {
1675 var res Mask16s
1676 for i := 0; i < 8; i++ {
1677 if x.get(i) != y.get(i) {
1678 res.set(i, true)
1679 }
1680 }
1681 return res
1682 }
1683
1684
1685 func (x Uint16s) Len() int {
1686 return 8
1687 }
1688
1689
1690 func (x Uint16s) Masked(mask Mask16s) Uint16s {
1691 return Uint16s{a: x.a & mask.a, b: x.b & mask.b}
1692 }
1693
1694
1695 func (x Uint16s) Max(y Uint16s) Uint16s {
1696 var res Uint16s
1697 for i := 0; i < 8; i++ {
1698 vx := x.get(i)
1699 vy := y.get(i)
1700 if vx > vy {
1701 res.set(i, vx)
1702 } else {
1703 res.set(i, vy)
1704 }
1705 }
1706 return res
1707 }
1708
1709
1710 func (x Uint16s) IfElse(mask Mask16s, y Uint16s) Uint16s {
1711 return Uint16s{
1712 a: (x.a & mask.a) | (y.a &^ mask.a),
1713 b: (x.b & mask.b) | (y.b &^ mask.b),
1714 }
1715 }
1716
1717
1718 func (x Uint16s) Min(y Uint16s) Uint16s {
1719 var res Uint16s
1720 for i := 0; i < 8; i++ {
1721 vx := x.get(i)
1722 vy := y.get(i)
1723 if vx < vy {
1724 res.set(i, vx)
1725 } else {
1726 res.set(i, vy)
1727 }
1728 }
1729 return res
1730 }
1731
1732
1733 func (x Uint16s) Mul(y Uint16s) Uint16s {
1734 var res Uint16s
1735 for i := 0; i < 8; i++ {
1736 res.set(i, x.get(i)*y.get(i))
1737 }
1738 return res
1739 }
1740
1741
1742 func (x Uint16s) Not() Uint16s {
1743 return Uint16s{a: ^x.a, b: ^x.b}
1744 }
1745
1746
1747 func (x Uint16s) Or(y Uint16s) Uint16s {
1748 return Uint16s{a: x.a | y.a, b: x.b | y.b}
1749 }
1750
1751
1752 func (x Uint16s) ShiftAllLeft(y uint8) Uint16s {
1753 var res Uint16s
1754 for i := 0; i < 8; i++ {
1755 res.set(i, x.get(i)<<y)
1756 }
1757 return res
1758 }
1759
1760
1761 func (x Uint16s) ShiftAllRight(y uint8) Uint16s {
1762 var res Uint16s
1763 for i := 0; i < 8; i++ {
1764 res.set(i, x.get(i)>>y)
1765 }
1766 return res
1767 }
1768
1769
1770 func (x Uint16s) RotateAllLeft(dist uint64) Uint16s {
1771 var res Uint16s
1772 d := dist & 15
1773 for i := 0; i < 8; i++ {
1774 u := x.get(i)
1775 r := (u << d) | (u >> ((16 - d) & 15))
1776 res.set(i, r)
1777 }
1778 return res
1779 }
1780
1781
1782 func (x Uint16s) RotateAllRight(dist uint64) Uint16s {
1783 var res Uint16s
1784 d := dist & 15
1785 for i := 0; i < 8; i++ {
1786 u := x.get(i)
1787 r := (u >> d) | (u << ((16 - d) & 15))
1788 res.set(i, r)
1789 }
1790 return res
1791 }
1792
1793
1794 func (x Uint16s) Store(s []uint16) {
1795 for i := 0; i < 8 && i < len(s); i++ {
1796 s[i] = x.get(i)
1797 }
1798 }
1799
1800
1801 func (x Uint16s) StorePart(s []uint16) int {
1802 x.Store(s)
1803 return min(len(s), x.Len())
1804 }
1805
1806
1807 func (x Uint16s) String() string {
1808 var parts [8]uint16
1809 for i := 0; i < 8; i++ {
1810 parts[i] = x.get(i)
1811 }
1812 return fmt.Sprint(parts)
1813 }
1814
1815
1816 func (x Uint16s) Sub(y Uint16s) Uint16s {
1817 var res Uint16s
1818 for i := 0; i < 8; i++ {
1819 res.set(i, x.get(i)-y.get(i))
1820 }
1821 return res
1822 }
1823
1824
1825 func (x Uint16s) SubSaturated(y Uint16s) Uint16s {
1826 var res Uint16s
1827 for i := 0; i < 8; i++ {
1828 vx := x.get(i)
1829 vy := y.get(i)
1830 if vx < vy {
1831 res.set(i, 0)
1832 } else {
1833 res.set(i, vx-vy)
1834 }
1835 }
1836 return res
1837 }
1838
1839
1840 func (x Uint16s) Xor(y Uint16s) Uint16s {
1841 return Uint16s{a: x.a ^ y.a, b: x.b ^ y.b}
1842 }
1843
1844
1845 func (x Uint16s) BitsToInt16() Int16s {
1846 return Int16s{a: x.a, b: x.b}
1847 }
1848
1849
1850 func (x Uint16s) ConvertToInt16() Int16s {
1851 return Int16s{a: x.a, b: x.b}
1852 }
1853
1854
1855 func (x Uint16s) ReshapeToUint32s() Uint32s {
1856 return Uint32s{a: x.a, b: x.b}
1857 }
1858
1859
1860 func (x Uint16s) ReshapeToUint64s() Uint64s {
1861 return Uint64s{a: x.a, b: x.b}
1862 }
1863
1864
1865 func (x Uint16s) ReshapeToUint8s() Uint8s {
1866 return Uint8s{a: x.a, b: x.b}
1867 }
1868
1869
1870 func LoadUint32s(s []uint32) Uint32s {
1871 var a, b uint64
1872 for i := 0; i < 4; i++ {
1873 val := uint64(s[i])
1874 if i < 2 {
1875 a |= val << (32 * i)
1876 } else {
1877 b |= val << (32 * (i - 2))
1878 }
1879 }
1880 return Uint32s{a: a, b: b}
1881 }
1882
1883
1884 func LoadUint32sPart(s []uint32) (Uint32s, int) {
1885 var a, b uint64
1886 n := len(s)
1887 if n > 4 {
1888 n = 4
1889 }
1890 for i := 0; i < n; i++ {
1891 val := uint64(s[i])
1892 if i < 2 {
1893 a |= val << (32 * i)
1894 } else {
1895 b |= val << (32 * (i - 2))
1896 }
1897 }
1898 return Uint32s{a: a, b: b}, n
1899 }
1900
1901 func (x Uint32s) get(i int) uint32 {
1902 if i < 2 {
1903 return uint32(x.a >> (32 * i))
1904 }
1905 return uint32(x.b >> (32 * (i - 2)))
1906 }
1907
1908 func (x *Uint32s) set(i int, v uint32) {
1909 val := uint64(v)
1910 if i < 2 {
1911 mask := uint64(0xffffffff) << (32 * i)
1912 x.a = (x.a &^ mask) | (val << (32 * i))
1913 } else {
1914 mask := uint64(0xffffffff) << (32 * (i - 2))
1915 x.b = (x.b &^ mask) | (val << (32 * (i - 2)))
1916 }
1917 }
1918
1919
1920 func (x Uint32s) Add(y Uint32s) Uint32s {
1921 var res Uint32s
1922 for i := 0; i < 4; i++ {
1923 res.set(i, x.get(i)+y.get(i))
1924 }
1925 return res
1926 }
1927
1928
1929 func (x Uint32s) And(y Uint32s) Uint32s {
1930 return Uint32s{a: x.a & y.a, b: x.b & y.b}
1931 }
1932
1933
1934 func (x Uint32s) AndNot(y Uint32s) Uint32s {
1935 return Uint32s{a: x.a &^ y.a, b: x.b &^ y.b}
1936 }
1937
1938
1939 func (x Uint32s) Equal(y Uint32s) Mask32s {
1940 var res Mask32s
1941 for i := 0; i < 4; i++ {
1942 if x.get(i) == y.get(i) {
1943 res.set(i, true)
1944 }
1945 }
1946 return res
1947 }
1948
1949
1950 func (x Uint32s) Greater(y Uint32s) Mask32s {
1951 var res Mask32s
1952 for i := 0; i < 4; i++ {
1953 if x.get(i) > y.get(i) {
1954 res.set(i, true)
1955 }
1956 }
1957 return res
1958 }
1959
1960
1961 func (x Uint32s) GreaterEqual(y Uint32s) Mask32s {
1962 var res Mask32s
1963 for i := 0; i < 4; i++ {
1964 if x.get(i) >= y.get(i) {
1965 res.set(i, true)
1966 }
1967 }
1968 return res
1969 }
1970
1971
1972 func (x Uint32s) Less(y Uint32s) Mask32s {
1973 var res Mask32s
1974 for i := 0; i < 4; i++ {
1975 if x.get(i) < y.get(i) {
1976 res.set(i, true)
1977 }
1978 }
1979 return res
1980 }
1981
1982
1983 func (x Uint32s) LessEqual(y Uint32s) Mask32s {
1984 var res Mask32s
1985 for i := 0; i < 4; i++ {
1986 if x.get(i) <= y.get(i) {
1987 res.set(i, true)
1988 }
1989 }
1990 return res
1991 }
1992
1993
1994 func (x Uint32s) NotEqual(y Uint32s) Mask32s {
1995 var res Mask32s
1996 for i := 0; i < 4; i++ {
1997 if x.get(i) != y.get(i) {
1998 res.set(i, true)
1999 }
2000 }
2001 return res
2002 }
2003
2004
2005 func (x Uint32s) Len() int {
2006 return 4
2007 }
2008
2009
2010 func (x Uint32s) Masked(mask Mask32s) Uint32s {
2011 return Uint32s{a: x.a & mask.a, b: x.b & mask.b}
2012 }
2013
2014
2015 func (x Uint32s) Max(y Uint32s) Uint32s {
2016 var res Uint32s
2017 for i := 0; i < 4; i++ {
2018 vx := x.get(i)
2019 vy := y.get(i)
2020 if vx > vy {
2021 res.set(i, vx)
2022 } else {
2023 res.set(i, vy)
2024 }
2025 }
2026 return res
2027 }
2028
2029
2030 func (x Uint32s) IfElse(mask Mask32s, y Uint32s) Uint32s {
2031 return Uint32s{
2032 a: (x.a & mask.a) | (y.a &^ mask.a),
2033 b: (x.b & mask.b) | (y.b &^ mask.b),
2034 }
2035 }
2036
2037
2038 func (x Uint32s) Min(y Uint32s) Uint32s {
2039 var res Uint32s
2040 for i := 0; i < 4; i++ {
2041 vx := x.get(i)
2042 vy := y.get(i)
2043 if vx < vy {
2044 res.set(i, vx)
2045 } else {
2046 res.set(i, vy)
2047 }
2048 }
2049 return res
2050 }
2051
2052
2053 func (x Uint32s) Mul(y Uint32s) Uint32s {
2054 var res Uint32s
2055 for i := 0; i < 4; i++ {
2056 res.set(i, x.get(i)*y.get(i))
2057 }
2058 return res
2059 }
2060
2061
2062 func (x Uint32s) Not() Uint32s {
2063 return Uint32s{a: ^x.a, b: ^x.b}
2064 }
2065
2066
2067 func (x Uint32s) Or(y Uint32s) Uint32s {
2068 return Uint32s{a: x.a | y.a, b: x.b | y.b}
2069 }
2070
2071
2072 func (x Uint32s) ShiftAllLeft(y uint8) Uint32s {
2073 var res Uint32s
2074 for i := 0; i < 4; i++ {
2075 res.set(i, x.get(i)<<y)
2076 }
2077 return res
2078 }
2079
2080
2081 func (x Uint32s) ShiftAllRight(y uint8) Uint32s {
2082 var res Uint32s
2083 for i := 0; i < 4; i++ {
2084 res.set(i, x.get(i)>>y)
2085 }
2086 return res
2087 }
2088
2089
2090 func (x Uint32s) RotateAllLeft(dist uint64) Uint32s {
2091 var res Uint32s
2092 d := dist & 31
2093 for i := 0; i < 4; i++ {
2094 u := x.get(i)
2095 r := (u << d) | (u >> ((32 - d) & 31))
2096 res.set(i, r)
2097 }
2098 return res
2099 }
2100
2101
2102 func (x Uint32s) RotateAllRight(dist uint64) Uint32s {
2103 var res Uint32s
2104 d := dist & 31
2105 for i := 0; i < 4; i++ {
2106 u := x.get(i)
2107 r := (u >> d) | (u << ((32 - d) & 31))
2108 res.set(i, r)
2109 }
2110 return res
2111 }
2112
2113
2114 func (x Uint32s) Store(s []uint32) {
2115 for i := 0; i < 4 && i < len(s); i++ {
2116 s[i] = x.get(i)
2117 }
2118 }
2119
2120
2121 func (x Uint32s) StorePart(s []uint32) int {
2122 x.Store(s)
2123 return min(len(s), x.Len())
2124 }
2125
2126
2127 func (x Uint32s) String() string {
2128 var parts [4]uint32
2129 for i := 0; i < 4; i++ {
2130 parts[i] = x.get(i)
2131 }
2132 return fmt.Sprint(parts)
2133 }
2134
2135
2136 func (x Uint32s) Sub(y Uint32s) Uint32s {
2137 var res Uint32s
2138 for i := 0; i < 4; i++ {
2139 res.set(i, x.get(i)-y.get(i))
2140 }
2141 return res
2142 }
2143
2144
2145 func (x Uint32s) Xor(y Uint32s) Uint32s {
2146 return Uint32s{a: x.a ^ y.a, b: x.b ^ y.b}
2147 }
2148
2149
2150 func (x Uint32s) BitsToFloat32() Float32s {
2151 return Float32s{a: x.a, b: x.b}
2152 }
2153
2154
2155 func (x Uint32s) BitsToInt32() Int32s {
2156 return Int32s{a: x.a, b: x.b}
2157 }
2158
2159
2160 func (x Uint32s) ConvertToInt32() Int32s {
2161 return Int32s{a: x.a, b: x.b}
2162 }
2163
2164
2165 func (x Uint32s) ReshapeToUint16s() Uint16s {
2166 return Uint16s{a: x.a, b: x.b}
2167 }
2168
2169
2170 func (x Uint32s) ReshapeToUint64s() Uint64s {
2171 return Uint64s{a: x.a, b: x.b}
2172 }
2173
2174
2175 func (x Uint32s) ReshapeToUint8s() Uint8s {
2176 return Uint8s{a: x.a, b: x.b}
2177 }
2178
2179
2180 func LoadUint64s(s []uint64) Uint64s {
2181 var a, b uint64
2182 a = s[0]
2183 b = s[1]
2184 return Uint64s{a: a, b: b}
2185 }
2186
2187
2188 func LoadUint64sPart(s []uint64) (Uint64s, int) {
2189 n := len(s)
2190 var a, b uint64
2191 if n > 0 {
2192 a = s[0]
2193 }
2194 if n > 1 {
2195 b = s[1]
2196 }
2197 return Uint64s{a: a, b: b}, n
2198 }
2199
2200 func (x Uint64s) get(i int) uint64 {
2201 if i == 0 {
2202 return x.a
2203 }
2204 return x.b
2205 }
2206
2207 func (x *Uint64s) set(i int, v uint64) {
2208 if i == 0 {
2209 x.a = v
2210 } else {
2211 x.b = v
2212 }
2213 }
2214
2215
2216 func (x Uint64s) Add(y Uint64s) Uint64s {
2217 return Uint64s{a: x.a + y.a, b: x.b + y.b}
2218 }
2219
2220
2221 func (x Uint64s) And(y Uint64s) Uint64s {
2222 return Uint64s{a: x.a & y.a, b: x.b & y.b}
2223 }
2224
2225
2226 func (x Uint64s) AndNot(y Uint64s) Uint64s {
2227 return Uint64s{a: x.a &^ y.a, b: x.b &^ y.b}
2228 }
2229
2230
2231 func (x Uint64s) Equal(y Uint64s) Mask64s {
2232 var res Mask64s
2233 if x.a == y.a {
2234 res.a = ^uint64(0)
2235 }
2236 if x.b == y.b {
2237 res.b = ^uint64(0)
2238 }
2239 return res
2240 }
2241
2242
2243 func (x Uint64s) Greater(y Uint64s) Mask64s {
2244 var res Mask64s
2245 for i := 0; i < 2; i++ {
2246 if x.get(i) > y.get(i) {
2247 res.set(i, true)
2248 }
2249 }
2250 return res
2251 }
2252
2253
2254 func (x Uint64s) GreaterEqual(y Uint64s) Mask64s {
2255 var res Mask64s
2256 for i := 0; i < 2; i++ {
2257 if x.get(i) >= y.get(i) {
2258 res.set(i, true)
2259 }
2260 }
2261 return res
2262 }
2263
2264
2265 func (x Uint64s) Less(y Uint64s) Mask64s {
2266 var res Mask64s
2267 for i := 0; i < 2; i++ {
2268 if x.get(i) < y.get(i) {
2269 res.set(i, true)
2270 }
2271 }
2272 return res
2273 }
2274
2275
2276 func (x Uint64s) LessEqual(y Uint64s) Mask64s {
2277 var res Mask64s
2278 for i := 0; i < 2; i++ {
2279 if x.get(i) <= y.get(i) {
2280 res.set(i, true)
2281 }
2282 }
2283 return res
2284 }
2285
2286
2287 func (x Uint64s) NotEqual(y Uint64s) Mask64s {
2288 var res Mask64s
2289 if x.a != y.a {
2290 res.a = ^uint64(0)
2291 }
2292 if x.b != y.b {
2293 res.b = ^uint64(0)
2294 }
2295 return res
2296 }
2297
2298
2299 func (x Uint64s) Len() int {
2300 return 2
2301 }
2302
2303
2304 func (x Uint64s) Masked(mask Mask64s) Uint64s {
2305 return Uint64s{a: x.a & mask.a, b: x.b & mask.b}
2306 }
2307
2308
2309 func (x Uint64s) IfElse(mask Mask64s, y Uint64s) Uint64s {
2310 return Uint64s{
2311 a: (x.a & mask.a) | (y.a &^ mask.a),
2312 b: (x.b & mask.b) | (y.b &^ mask.b),
2313 }
2314 }
2315
2316
2317 func (x Uint64s) Not() Uint64s {
2318 return Uint64s{a: ^x.a, b: ^x.b}
2319 }
2320
2321
2322 func (x Uint64s) Or(y Uint64s) Uint64s {
2323 return Uint64s{a: x.a | y.a, b: x.b | y.b}
2324 }
2325
2326
2327 func (x Uint64s) ShiftAllLeft(y uint8) Uint64s {
2328 return Uint64s{a: x.a << y, b: x.b << y}
2329 }
2330
2331
2332 func (x Uint64s) ShiftAllRight(y uint8) Uint64s {
2333 return Uint64s{a: x.a >> y, b: x.b >> y}
2334 }
2335
2336
2337 func (x Uint64s) RotateAllLeft(dist uint64) Uint64s {
2338 d := dist & 63
2339 return Uint64s{
2340 a: (x.a << d) | (x.a >> ((64 - d) & 63)),
2341 b: (x.b << d) | (x.b >> ((64 - d) & 63)),
2342 }
2343 }
2344
2345
2346 func (x Uint64s) RotateAllRight(dist uint64) Uint64s {
2347 d := dist & 63
2348 return Uint64s{
2349 a: (x.a >> d) | (x.a << ((64 - d) & 63)),
2350 b: (x.b >> d) | (x.b << ((64 - d) & 63)),
2351 }
2352 }
2353
2354
2355 func (x Uint64s) Store(s []uint64) {
2356 if len(s) > 0 {
2357 s[0] = x.a
2358 }
2359 if len(s) > 1 {
2360 s[1] = x.b
2361 }
2362 }
2363
2364
2365 func (x Uint64s) StorePart(s []uint64) int {
2366 x.Store(s)
2367 return min(len(s), x.Len())
2368 }
2369
2370
2371 func (x Uint64s) String() string {
2372 return fmt.Sprint([2]uint64{x.a, x.b})
2373 }
2374
2375
2376 func (x Uint64s) Sub(y Uint64s) Uint64s {
2377 return Uint64s{a: x.a - y.a, b: x.b - y.b}
2378 }
2379
2380
2381 func (x Uint64s) Xor(y Uint64s) Uint64s {
2382 return Uint64s{a: x.a ^ y.a, b: x.b ^ y.b}
2383 }
2384
2385
2386 func (x Uint64s) BitsToFloat64() Float64s {
2387 return Float64s{a: x.a, b: x.b}
2388 }
2389
2390
2391 func (x Uint64s) BitsToInt64() Int64s {
2392 return Int64s{a: x.a, b: x.b}
2393 }
2394
2395
2396 func (x Uint64s) ConvertToInt64() Int64s {
2397 return Int64s{a: x.a, b: x.b}
2398 }
2399
2400
2401 func (x Uint64s) ReshapeToUint16s() Uint16s {
2402 return Uint16s{a: x.a, b: x.b}
2403 }
2404
2405
2406 func (x Uint64s) ReshapeToUint32s() Uint32s {
2407 return Uint32s{a: x.a, b: x.b}
2408 }
2409
2410
2411 func (x Uint64s) ReshapeToUint8s() Uint8s {
2412 return Uint8s{a: x.a, b: x.b}
2413 }
2414
2415
2416 func LoadFloat32s(s []float32) Float32s {
2417 var a, b uint64
2418 for i := 0; i < 4; i++ {
2419 val := uint64(math.Float32bits(s[i]))
2420 if i < 2 {
2421 a |= val << (32 * i)
2422 } else {
2423 b |= val << (32 * (i - 2))
2424 }
2425 }
2426 return Float32s{a: a, b: b}
2427 }
2428
2429
2430 func LoadFloat32sPart(s []float32) (Float32s, int) {
2431 var a, b uint64
2432 n := len(s)
2433 if n > 4 {
2434 n = 4
2435 }
2436 for i := 0; i < n; i++ {
2437 val := uint64(math.Float32bits(s[i]))
2438 if i < 2 {
2439 a |= val << (32 * i)
2440 } else {
2441 b |= val << (32 * (i - 2))
2442 }
2443 }
2444 return Float32s{a: a, b: b}, n
2445 }
2446
2447 func (x Float32s) get(i int) float32 {
2448 if i < 2 {
2449 return math.Float32frombits(uint32(x.a >> (32 * i)))
2450 }
2451 return math.Float32frombits(uint32(x.b >> (32 * (i - 2))))
2452 }
2453
2454 func (x *Float32s) set(i int, v float32) {
2455 val := uint64(math.Float32bits(v))
2456 if i < 2 {
2457 mask := uint64(0xffffffff) << (32 * i)
2458 x.a = (x.a &^ mask) | (val << (32 * i))
2459 } else {
2460 mask := uint64(0xffffffff) << (32 * (i - 2))
2461 x.b = (x.b &^ mask) | (val << (32 * (i - 2)))
2462 }
2463 }
2464
2465
2466 func (x Float32s) Abs() Float32s {
2467 var res Float32s
2468 for i := 0; i < 4; i++ {
2469 v := x.get(i)
2470 if v < 0 {
2471 res.set(i, -v)
2472 } else {
2473 res.set(i, v)
2474 }
2475 }
2476 return res
2477 }
2478
2479
2480 func (x Float32s) Add(y Float32s) Float32s {
2481 var res Float32s
2482 res.set(0, x.get(0)+y.get(0))
2483 res.set(1, x.get(1)+y.get(1))
2484 res.set(2, x.get(2)+y.get(2))
2485 res.set(3, x.get(3)+y.get(3))
2486 return res
2487 }
2488
2489
2490 func (x Float32s) ConvertToInt32() Int32s {
2491 var res Int32s
2492 for i := 0; i < 4; i++ {
2493 res.set(i, int32(x.get(i)))
2494 }
2495 return res
2496 }
2497
2498
2499 func (x Float32s) Div(y Float32s) Float32s {
2500 var res Float32s
2501 for i := 0; i < 4; i++ {
2502 res.set(i, x.get(i)/y.get(i))
2503 }
2504 return res
2505 }
2506
2507
2508 func (x Float32s) Equal(y Float32s) Mask32s {
2509 var res Mask32s
2510 for i := 0; i < 4; i++ {
2511 if x.get(i) == y.get(i) {
2512 res.set(i, true)
2513 }
2514 }
2515 return res
2516 }
2517
2518
2519 func (x Float32s) Greater(y Float32s) Mask32s {
2520 var res Mask32s
2521 for i := 0; i < 4; i++ {
2522 if x.get(i) > y.get(i) {
2523 res.set(i, true)
2524 }
2525 }
2526 return res
2527 }
2528
2529
2530 func (x Float32s) GreaterEqual(y Float32s) Mask32s {
2531 var res Mask32s
2532 for i := 0; i < 4; i++ {
2533 if x.get(i) >= y.get(i) {
2534 res.set(i, true)
2535 }
2536 }
2537 return res
2538 }
2539
2540
2541 func (x Float32s) Len() int {
2542 return 4
2543 }
2544
2545
2546 func (x Float32s) Less(y Float32s) Mask32s {
2547 var res Mask32s
2548 for i := 0; i < 4; i++ {
2549 if x.get(i) < y.get(i) {
2550 res.set(i, true)
2551 }
2552 }
2553 return res
2554 }
2555
2556
2557 func (x Float32s) LessEqual(y Float32s) Mask32s {
2558 var res Mask32s
2559 for i := 0; i < 4; i++ {
2560 if x.get(i) <= y.get(i) {
2561 res.set(i, true)
2562 }
2563 }
2564 return res
2565 }
2566
2567
2568 func (x Float32s) Masked(mask Mask32s) Float32s {
2569 return Float32s{a: x.a & mask.a, b: x.b & mask.b}
2570 }
2571
2572
2573 func (x Float32s) Max(y Float32s) Float32s {
2574 var res Float32s
2575 for i := 0; i < 4; i++ {
2576 vx := x.get(i)
2577 vy := y.get(i)
2578 if vx > vy {
2579 res.set(i, vx)
2580 } else {
2581 res.set(i, vy)
2582 }
2583 }
2584 return res
2585 }
2586
2587
2588 func (x Float32s) IfElse(mask Mask32s, y Float32s) Float32s {
2589 return Float32s{
2590 a: (x.a & mask.a) | (y.a &^ mask.a),
2591 b: (x.b & mask.b) | (y.b &^ mask.b),
2592 }
2593 }
2594
2595
2596 func (x Float32s) Min(y Float32s) Float32s {
2597 var res Float32s
2598 for i := 0; i < 4; i++ {
2599 vx := x.get(i)
2600 vy := y.get(i)
2601 if vx < vy {
2602 res.set(i, vx)
2603 } else {
2604 res.set(i, vy)
2605 }
2606 }
2607 return res
2608 }
2609
2610
2611 func (x Float32s) Mul(y Float32s) Float32s {
2612 var res Float32s
2613 res.set(0, x.get(0)*y.get(0))
2614 res.set(1, x.get(1)*y.get(1))
2615 res.set(2, x.get(2)*y.get(2))
2616 res.set(3, x.get(3)*y.get(3))
2617
2618 return res
2619 }
2620
2621
2622 func (x Float32s) MulAdd(y, z Float32s) Float32s {
2623 var res Float32s
2624
2625 res.set(0, x.get(0)*y.get(0)+z.get(0))
2626 res.set(1, x.get(1)*y.get(1)+z.get(1))
2627 res.set(2, x.get(2)*y.get(2)+z.get(2))
2628 res.set(3, x.get(3)*y.get(3)+z.get(3))
2629 return res
2630 }
2631
2632
2633 func (x Float32s) Neg() Float32s {
2634 var res Float32s
2635 for i := 0; i < 4; i++ {
2636 res.set(i, -(x.get(i)))
2637 }
2638 return res
2639 }
2640
2641
2642 func (x Float32s) NotEqual(y Float32s) Mask32s {
2643 var res Mask32s
2644 for i := 0; i < 4; i++ {
2645 if x.get(i) != y.get(i) {
2646 res.set(i, true)
2647 }
2648 }
2649 return res
2650 }
2651
2652
2653 func (x Float32s) Sqrt() Float32s {
2654 var res Float32s
2655 for i := 0; i < 4; i++ {
2656 res.set(i, float32(math.Sqrt(float64(x.get(i)))))
2657 }
2658 return res
2659 }
2660
2661
2662 func (x Float32s) Store(s []float32) {
2663 for i := 0; i < 4 && i < len(s); i++ {
2664 s[i] = x.get(i)
2665 }
2666 }
2667
2668
2669 func (x Float32s) StorePart(s []float32) int {
2670 x.Store(s)
2671 return min(len(s), x.Len())
2672 }
2673
2674
2675 func (x Float32s) String() string {
2676 var parts [4]float32
2677 for i := 0; i < 4; i++ {
2678 parts[i] = x.get(i)
2679 }
2680 return fmt.Sprint(parts)
2681 }
2682
2683
2684 func (x Float32s) Sub(y Float32s) Float32s {
2685 var res Float32s
2686 for i := 0; i < 4; i++ {
2687 res.set(i, x.get(i)-y.get(i))
2688 }
2689 return res
2690 }
2691
2692
2693 func (x Float32s) ToBits() Uint32s {
2694 return Uint32s{a: x.a, b: x.b}
2695 }
2696
2697
2698 func LoadFloat64s(s []float64) Float64s {
2699 var a, b uint64
2700 a = math.Float64bits(s[0])
2701 b = math.Float64bits(s[1])
2702 return Float64s{a: a, b: b}
2703 }
2704
2705
2706 func LoadFloat64sPart(s []float64) (Float64s, int) {
2707 n := len(s)
2708 var a, b uint64
2709 if n > 0 {
2710 a = math.Float64bits(s[0])
2711 }
2712 if n > 1 {
2713 b = math.Float64bits(s[1])
2714 }
2715 return Float64s{a: a, b: b}, n
2716 }
2717
2718 func (x Float64s) get(i int) float64 {
2719 if i == 0 {
2720 return math.Float64frombits(x.a)
2721 }
2722 return math.Float64frombits(x.b)
2723 }
2724
2725 func (x *Float64s) set(i int, v float64) {
2726 if i == 0 {
2727 x.a = math.Float64bits(v)
2728 } else {
2729 x.b = math.Float64bits(v)
2730 }
2731 }
2732
2733
2734 func (x Float64s) Abs() Float64s {
2735 var res Float64s
2736 for i := 0; i < 4; i++ {
2737 v := x.get(i)
2738 if v < 0 {
2739 res.set(i, -v)
2740 } else {
2741 res.set(i, v)
2742 }
2743 }
2744 return res
2745 }
2746
2747
2748 func (x Float64s) Add(y Float64s) Float64s {
2749 var res Float64s
2750 res.set(0, x.get(0)+y.get(0))
2751 res.set(1, x.get(1)+y.get(1))
2752 return res
2753 }
2754
2755
2756 func (x Float64s) Div(y Float64s) Float64s {
2757 var res Float64s
2758 res.set(0, x.get(0)/y.get(0))
2759 res.set(1, x.get(1)/y.get(1))
2760 return res
2761 }
2762
2763
2764 func (x Float64s) Equal(y Float64s) Mask64s {
2765 var res Mask64s
2766 if x.get(0) == y.get(0) {
2767 res.a = ^uint64(0)
2768 }
2769 if x.get(1) == y.get(1) {
2770 res.b = ^uint64(0)
2771 }
2772 return res
2773 }
2774
2775
2776 func (x Float64s) Greater(y Float64s) Mask64s {
2777 var res Mask64s
2778 if x.get(0) > y.get(0) {
2779 res.a = ^uint64(0)
2780 }
2781 if x.get(1) > y.get(1) {
2782 res.b = ^uint64(0)
2783 }
2784 return res
2785 }
2786
2787
2788 func (x Float64s) GreaterEqual(y Float64s) Mask64s {
2789 var res Mask64s
2790 if x.get(0) >= y.get(0) {
2791 res.a = ^uint64(0)
2792 }
2793 if x.get(1) >= y.get(1) {
2794 res.b = ^uint64(0)
2795 }
2796 return res
2797 }
2798
2799
2800 func (x Float64s) Len() int {
2801 return 2
2802 }
2803
2804
2805 func (x Float64s) Less(y Float64s) Mask64s {
2806 var res Mask64s
2807 if x.get(0) < y.get(0) {
2808 res.a = ^uint64(0)
2809 }
2810 if x.get(1) < y.get(1) {
2811 res.b = ^uint64(0)
2812 }
2813 return res
2814 }
2815
2816
2817 func (x Float64s) LessEqual(y Float64s) Mask64s {
2818 var res Mask64s
2819 if x.get(0) <= y.get(0) {
2820 res.a = ^uint64(0)
2821 }
2822 if x.get(1) <= y.get(1) {
2823 res.b = ^uint64(0)
2824 }
2825 return res
2826 }
2827
2828
2829 func (x Float64s) Masked(mask Mask64s) Float64s {
2830 return Float64s{a: x.a & mask.a, b: x.b & mask.b}
2831 }
2832
2833
2834 func (x Float64s) Max(y Float64s) Float64s {
2835 var res Float64s
2836 vx := x.get(0)
2837 vy := y.get(0)
2838 if vx > vy {
2839 res.set(0, vx)
2840 } else {
2841 res.set(0, vy)
2842 }
2843 vx = x.get(1)
2844 vy = y.get(1)
2845 if vx > vy {
2846 res.set(1, vx)
2847 } else {
2848 res.set(1, vy)
2849 }
2850 return res
2851 }
2852
2853
2854 func (x Float64s) IfElse(mask Mask64s, y Float64s) Float64s {
2855 return Float64s{
2856 a: (x.a & mask.a) | (y.a &^ mask.a),
2857 b: (x.b & mask.b) | (y.b &^ mask.b),
2858 }
2859 }
2860
2861
2862 func (x Float64s) Min(y Float64s) Float64s {
2863 var res Float64s
2864 vx := x.get(0)
2865 vy := y.get(0)
2866 if vx < vy {
2867 res.set(0, vx)
2868 } else {
2869 res.set(0, vy)
2870 }
2871 vx = x.get(1)
2872 vy = y.get(1)
2873 if vx < vy {
2874 res.set(1, vx)
2875 } else {
2876 res.set(1, vy)
2877 }
2878 return res
2879 }
2880
2881
2882 func (x Float64s) Mul(y Float64s) Float64s {
2883 var res Float64s
2884 res.set(0, x.get(0)*y.get(0))
2885 res.set(1, x.get(1)*y.get(1))
2886 return res
2887 }
2888
2889
2890 func (x Float64s) MulAdd(y, z Float64s) Float64s {
2891 var res Float64s
2892 res.set(0, x.get(0)*y.get(0)+z.get(0))
2893 res.set(1, x.get(1)*y.get(1)+z.get(1))
2894 return res
2895 }
2896
2897
2898 func (x Float64s) Neg() Float64s {
2899 var res Float64s
2900 for i := 0; i < 4; i++ {
2901 res.set(i, -(x.get(i)))
2902 }
2903 return res
2904 }
2905
2906
2907 func (x Float64s) NotEqual(y Float64s) Mask64s {
2908 var res Mask64s
2909 if x.get(0) != y.get(0) {
2910 res.a = ^uint64(0)
2911 }
2912 if x.get(1) != y.get(1) {
2913 res.b = ^uint64(0)
2914 }
2915 return res
2916 }
2917
2918
2919 func (x Float64s) Sqrt() Float64s {
2920 var res Float64s
2921 res.set(0, math.Sqrt(x.get(0)))
2922 res.set(1, math.Sqrt(x.get(1)))
2923 return res
2924 }
2925
2926
2927 func (x Float64s) Store(s []float64) {
2928 if len(s) > 0 {
2929 s[0] = x.get(0)
2930 }
2931 if len(s) > 1 {
2932 s[1] = x.get(1)
2933 }
2934 }
2935
2936
2937 func (x Float64s) StorePart(s []float64) int {
2938 x.Store(s)
2939 return min(len(s), x.Len())
2940 }
2941
2942
2943 func (x Float64s) String() string {
2944 return fmt.Sprint([2]float64{x.get(0), x.get(1)})
2945 }
2946
2947
2948 func (x Float64s) Sub(y Float64s) Float64s {
2949 var res Float64s
2950 res.set(0, x.get(0)-y.get(0))
2951 res.set(1, x.get(1)-y.get(1))
2952 return res
2953 }
2954
2955
2956 func (x Float64s) ToBits() Uint64s {
2957 return Uint64s{a: x.a, b: x.b}
2958 }
2959
2960 func (x *Mask8s) set(i int, v bool) {
2961 if v {
2962 if i < 8 {
2963 mask := uint64(0xff) << (8 * i)
2964 x.a |= mask
2965 } else {
2966 mask := uint64(0xff) << (8 * (i - 8))
2967 x.b |= mask
2968 }
2969 }
2970 }
2971
2972
2973 func (x Mask8s) And(y Mask8s) Mask8s {
2974 return Mask8s{a: x.a & y.a, b: x.b & y.b}
2975 }
2976
2977
2978 func (x Mask8s) Or(y Mask8s) Mask8s {
2979 return Mask8s{a: x.a | y.a, b: x.b | y.b}
2980 }
2981
2982
2983 func (x Mask8s) String() string {
2984 return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
2985 }
2986
2987
2988 func (x Mask8s) ToInt8s() Int8s {
2989 return Int8s{a: x.a, b: x.b}
2990 }
2991
2992 func (x *Mask16s) set(i int, v bool) {
2993 if v {
2994 if i < 4 {
2995 mask := uint64(0xffff) << (16 * i)
2996 x.a |= mask
2997 } else {
2998 mask := uint64(0xffff) << (16 * (i - 4))
2999 x.b |= mask
3000 }
3001 }
3002 }
3003
3004
3005 func (x Mask16s) And(y Mask16s) Mask16s {
3006 return Mask16s{a: x.a & y.a, b: x.b & y.b}
3007 }
3008
3009
3010 func (x Mask16s) Or(y Mask16s) Mask16s {
3011 return Mask16s{a: x.a | y.a, b: x.b | y.b}
3012 }
3013
3014
3015 func (x Mask16s) String() string {
3016 return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
3017 }
3018
3019
3020 func (x Mask16s) ToInt16s() Int16s {
3021 return Int16s{a: x.a, b: x.b}
3022 }
3023
3024 func (x *Mask32s) set(i int, v bool) {
3025 if v {
3026 if i < 2 {
3027 mask := uint64(0xffffffff) << (32 * i)
3028 x.a |= mask
3029 } else {
3030 mask := uint64(0xffffffff) << (32 * (i - 2))
3031 x.b |= mask
3032 }
3033 }
3034 }
3035
3036
3037 func (x Mask32s) And(y Mask32s) Mask32s {
3038 return Mask32s{a: x.a & y.a, b: x.b & y.b}
3039 }
3040
3041
3042 func (x Mask32s) Or(y Mask32s) Mask32s {
3043 return Mask32s{a: x.a | y.a, b: x.b | y.b}
3044 }
3045
3046
3047 func (x Mask32s) String() string {
3048 return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
3049 }
3050
3051
3052 func (x Mask32s) ToInt32s() Int32s {
3053 return Int32s{a: x.a, b: x.b}
3054 }
3055
3056 func (x *Mask64s) set(i int, v bool) {
3057 if v {
3058 if i == 0 {
3059 x.a = ^uint64(0)
3060 } else {
3061 x.b = ^uint64(0)
3062 }
3063 }
3064 }
3065
3066
3067 func (x Mask64s) And(y Mask64s) Mask64s {
3068 return Mask64s{a: x.a & y.a, b: x.b & y.b}
3069 }
3070
3071
3072 func (x Mask64s) Or(y Mask64s) Mask64s {
3073 return Mask64s{a: x.a | y.a, b: x.b | y.b}
3074 }
3075
3076
3077 func (x Mask64s) String() string {
3078 return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
3079 }
3080
3081
3082 func (x Mask64s) ToInt64s() Int64s {
3083 return Int64s{a: x.a, b: x.b}
3084 }
3085
3086 func newT(lo, hi uint64) Uint64s {
3087 return Uint64s{a: lo, b: hi}
3088 }
3089
3090
3091 func (x Uint64s) mwl(y Uint64s) Uint64s {
3092 hi, lo := bits.Mul64(x.a, y.a)
3093 return Uint64s{a: lo, b: hi}
3094 }
3095
3096 var (
3097
3098 m0 = newT(0x1084210842108421, 0x2108421084210842)
3099 m1 = newT(0x2108421084210842, 0x4210842108421084)
3100 m2 = newT(0x4210842108421084, 0x8421084210842108)
3101 m3 = newT(0x8421084210842108, 0x0842108421084210)
3102 m4 = newT(0x0842108421084210, 0x1084210842108421)
3103 )
3104
3105 func (x Uint64s) clmul(y Uint64s) Uint64s {
3106 x0 := x.And(m0)
3107 x1 := x.And(m1)
3108 x2 := x.And(m2)
3109 x3 := x.And(m3)
3110 x4 := x.And(m4)
3111
3112 y0 := y.And(m0)
3113 y1 := y.And(m1)
3114 y2 := y.And(m2)
3115 y3 := y.And(m3)
3116 y4 := y.And(m4)
3117
3118
3119 z := (x0.mwl(y0)).Xor(x1.mwl(y4)).Xor(x4.mwl(y1)).Xor(x2.mwl(y3)).Xor(x3.mwl(y2)).And(m0)
3120 z = (x3.mwl(y3)).Xor(x2.mwl(y4)).Xor(x4.mwl(y2)).Xor(x0.mwl(y1)).Xor(x1.mwl(y0)).And(m1).Or(z)
3121 z = (x1.mwl(y1)).Xor(x3.mwl(y4)).Xor(x4.mwl(y3)).Xor(x0.mwl(y2)).Xor(x2.mwl(y0)).And(m2).Or(z)
3122 z = (x4.mwl(y4)).Xor(x0.mwl(y3)).Xor(x3.mwl(y0)).Xor(x1.mwl(y2)).Xor(x2.mwl(y1)).And(m3).Or(z)
3123 z = (x2.mwl(y2)).Xor(x0.mwl(y4)).Xor(x4.mwl(y0)).Xor(x1.mwl(y3)).Xor(x3.mwl(y1)).And(m4).Or(z)
3124
3125 return z
3126 }
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141 func (x Uint64s) CarrylessMultiplyEven(y Uint64s) Uint64s {
3142 return x.clmul(y)
3143 }
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158 func (x Uint64s) CarrylessMultiplyOdd(y Uint64s) Uint64s {
3159 x.a = x.b
3160 y.a = y.b
3161 return x.clmul(y)
3162 }
3163
3164 const (
3165 by8 = 0x0101010101010101
3166 by16 = 0x0001000100010001
3167 )
3168
3169
3170 func BroadcastInt8s(x int8) Int8s {
3171 v := (255 & uint64(x)) * by8
3172 return Int8s{a: v, b: v}
3173 }
3174
3175
3176 func BroadcastInt16s(x int16) Int16s {
3177 v := (65535 & uint64(x)) * by16
3178 return Int16s{a: v, b: v}
3179 }
3180
3181
3182 func BroadcastInt32s(x int32) Int32s {
3183 v := uint64(x) & 0xffffffff
3184 v = v<<32 | v
3185 return Int32s{a: v, b: v}
3186 }
3187
3188
3189 func BroadcastInt64s(x int64) Int64s {
3190 v := uint64(x)
3191 return Int64s{a: v, b: v}
3192 }
3193
3194
3195 func BroadcastUint8s(x uint8) Uint8s {
3196 v := uint64(x) * by8
3197 return Uint8s{a: v, b: v}
3198
3199 }
3200
3201
3202 func BroadcastUint16s(x uint16) Uint16s {
3203 v := uint64(x) * by16
3204 return Uint16s{a: v, b: v}
3205
3206 }
3207
3208
3209 func BroadcastUint32s(x uint32) Uint32s {
3210 v := uint64(x)
3211 v = v<<32 | v
3212 return Uint32s{a: v, b: v}
3213 }
3214
3215
3216 func BroadcastUint64s(x uint64) Uint64s {
3217 return Uint64s{a: x, b: x}
3218 }
3219
3220
3221 func BroadcastFloat32s(x float32) Float32s {
3222 v := uint64(math.Float32bits(x))
3223 v = v<<32 | v
3224 return Float32s{a: v, b: v}
3225 }
3226
3227
3228 func BroadcastFloat64s(x float64) Float64s {
3229 v := math.Float64bits(x)
3230 return Float64s{a: v, b: v}
3231 }
3232
View as plain text