Text file src/math/big/arith_arm.s

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !math_big_pure_go
     6  
     7  #include "textflag.h"
     8  
     9  // This file provides fast assembly versions for the elementary
    10  // arithmetic operations on vectors implemented in arith.go.
    11  
    12  // func addVV(z, x, y []Word) (c Word)
    13  TEXT ·addVV(SB),NOSPLIT,$0
    14  	ADD.S	$0, R0		// clear carry flag
    15  	MOVW	z+0(FP), R1
    16  	MOVW	z_len+4(FP), R4
    17  	MOVW	x+12(FP), R2
    18  	MOVW	y+24(FP), R3
    19  	ADD	R4<<2, R1, R4
    20  	B E1
    21  L1:
    22  	MOVW.P	4(R2), R5
    23  	MOVW.P	4(R3), R6
    24  	ADC.S	R6, R5
    25  	MOVW.P	R5, 4(R1)
    26  E1:
    27  	TEQ	R1, R4
    28  	BNE L1
    29  
    30  	MOVW	$0, R0
    31  	MOVW.CS	$1, R0
    32  	MOVW	R0, c+36(FP)
    33  	RET
    34  
    35  
    36  // func subVV(z, x, y []Word) (c Word)
    37  // (same as addVV except for SBC instead of ADC and label names)
    38  TEXT ·subVV(SB),NOSPLIT,$0
    39  	SUB.S	$0, R0		// clear borrow flag
    40  	MOVW	z+0(FP), R1
    41  	MOVW	z_len+4(FP), R4
    42  	MOVW	x+12(FP), R2
    43  	MOVW	y+24(FP), R3
    44  	ADD	R4<<2, R1, R4
    45  	B E2
    46  L2:
    47  	MOVW.P	4(R2), R5
    48  	MOVW.P	4(R3), R6
    49  	SBC.S	R6, R5
    50  	MOVW.P	R5, 4(R1)
    51  E2:
    52  	TEQ	R1, R4
    53  	BNE L2
    54  
    55  	MOVW	$0, R0
    56  	MOVW.CC	$1, R0
    57  	MOVW	R0, c+36(FP)
    58  	RET
    59  
    60  
    61  // func addVW(z, x []Word, y Word) (c Word)
    62  TEXT ·addVW(SB),NOSPLIT,$0
    63  	MOVW	z+0(FP), R1
    64  	MOVW	z_len+4(FP), R4
    65  	MOVW	x+12(FP), R2
    66  	MOVW	y+24(FP), R3
    67  	ADD	R4<<2, R1, R4
    68  	TEQ	R1, R4
    69  	BNE L3a
    70  	MOVW	R3, c+28(FP)
    71  	RET
    72  L3a:
    73  	MOVW.P	4(R2), R5
    74  	ADD.S	R3, R5
    75  	MOVW.P	R5, 4(R1)
    76  	B	E3
    77  L3:
    78  	MOVW.P	4(R2), R5
    79  	ADC.S	$0, R5
    80  	MOVW.P	R5, 4(R1)
    81  E3:
    82  	TEQ	R1, R4
    83  	BNE	L3
    84  
    85  	MOVW	$0, R0
    86  	MOVW.CS	$1, R0
    87  	MOVW	R0, c+28(FP)
    88  	RET
    89  
    90  
    91  // func subVW(z, x []Word, y Word) (c Word)
    92  TEXT ·subVW(SB),NOSPLIT,$0
    93  	MOVW	z+0(FP), R1
    94  	MOVW	z_len+4(FP), R4
    95  	MOVW	x+12(FP), R2
    96  	MOVW	y+24(FP), R3
    97  	ADD	R4<<2, R1, R4
    98  	TEQ	R1, R4
    99  	BNE L4a
   100  	MOVW	R3, c+28(FP)
   101  	RET
   102  L4a:
   103  	MOVW.P	4(R2), R5
   104  	SUB.S	R3, R5
   105  	MOVW.P	R5, 4(R1)
   106  	B	E4
   107  L4:
   108  	MOVW.P	4(R2), R5
   109  	SBC.S	$0, R5
   110  	MOVW.P	R5, 4(R1)
   111  E4:
   112  	TEQ	R1, R4
   113  	BNE	L4
   114  
   115  	MOVW	$0, R0
   116  	MOVW.CC	$1, R0
   117  	MOVW	R0, c+28(FP)
   118  	RET
   119  
   120  
   121  // func shlVU(z, x []Word, s uint) (c Word)
   122  TEXT ·shlVU(SB),NOSPLIT,$0
   123  	MOVW	z_len+4(FP), R5
   124  	TEQ	$0, R5
   125  	BEQ	X7
   126  
   127  	MOVW	z+0(FP), R1
   128  	MOVW	x+12(FP), R2
   129  	ADD	R5<<2, R2, R2
   130  	ADD	R5<<2, R1, R5
   131  	MOVW	s+24(FP), R3
   132  	TEQ	$0, R3	// shift 0 is special
   133  	BEQ	Y7
   134  	ADD	$4, R1	// stop one word early
   135  	MOVW	$32, R4
   136  	SUB	R3, R4
   137  	MOVW	$0, R7
   138  
   139  	MOVW.W	-4(R2), R6
   140  	MOVW	R6<<R3, R7
   141  	MOVW	R6>>R4, R6
   142  	MOVW	R6, c+28(FP)
   143  	B E7
   144  
   145  L7:
   146  	MOVW.W	-4(R2), R6
   147  	ORR	R6>>R4, R7
   148  	MOVW.W	R7, -4(R5)
   149  	MOVW	R6<<R3, R7
   150  E7:
   151  	TEQ	R1, R5
   152  	BNE	L7
   153  
   154  	MOVW	R7, -4(R5)
   155  	RET
   156  
   157  Y7:	// copy loop, because shift 0 == shift 32
   158  	MOVW.W	-4(R2), R6
   159  	MOVW.W	R6, -4(R5)
   160  	TEQ	R1, R5
   161  	BNE Y7
   162  
   163  X7:
   164  	MOVW	$0, R1
   165  	MOVW	R1, c+28(FP)
   166  	RET
   167  
   168  
   169  // func shrVU(z, x []Word, s uint) (c Word)
   170  TEXT ·shrVU(SB),NOSPLIT,$0
   171  	MOVW	z_len+4(FP), R5
   172  	TEQ	$0, R5
   173  	BEQ	X6
   174  
   175  	MOVW	z+0(FP), R1
   176  	MOVW	x+12(FP), R2
   177  	ADD	R5<<2, R1, R5
   178  	MOVW	s+24(FP), R3
   179  	TEQ	$0, R3	// shift 0 is special
   180  	BEQ Y6
   181  	SUB	$4, R5	// stop one word early
   182  	MOVW	$32, R4
   183  	SUB	R3, R4
   184  	MOVW	$0, R7
   185  
   186  	// first word
   187  	MOVW.P	4(R2), R6
   188  	MOVW	R6>>R3, R7
   189  	MOVW	R6<<R4, R6
   190  	MOVW	R6, c+28(FP)
   191  	B E6
   192  
   193  	// word loop
   194  L6:
   195  	MOVW.P	4(R2), R6
   196  	ORR	R6<<R4, R7
   197  	MOVW.P	R7, 4(R1)
   198  	MOVW	R6>>R3, R7
   199  E6:
   200  	TEQ	R1, R5
   201  	BNE	L6
   202  
   203  	MOVW	R7, 0(R1)
   204  	RET
   205  
   206  Y6:	// copy loop, because shift 0 == shift 32
   207  	MOVW.P	4(R2), R6
   208  	MOVW.P	R6, 4(R1)
   209  	TEQ R1, R5
   210  	BNE Y6
   211  
   212  X6:
   213  	MOVW	$0, R1
   214  	MOVW	R1, c+28(FP)
   215  	RET
   216  
   217  
   218  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   219  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   220  	MOVW	$0, R0
   221  	MOVW	z+0(FP), R1
   222  	MOVW	z_len+4(FP), R5
   223  	MOVW	x+12(FP), R2
   224  	MOVW	y+24(FP), R3
   225  	MOVW	r+28(FP), R4
   226  	ADD	R5<<2, R1, R5
   227  	B E8
   228  
   229  	// word loop
   230  L8:
   231  	MOVW.P	4(R2), R6
   232  	MULLU	R6, R3, (R7, R6)
   233  	ADD.S	R4, R6
   234  	ADC	R0, R7
   235  	MOVW.P	R6, 4(R1)
   236  	MOVW	R7, R4
   237  E8:
   238  	TEQ	R1, R5
   239  	BNE	L8
   240  
   241  	MOVW	R4, c+32(FP)
   242  	RET
   243  
   244  
   245  // func addMulVVW(z, x []Word, y Word) (c Word)
   246  TEXT ·addMulVVW(SB),NOSPLIT,$0
   247  	MOVW	$0, R0
   248  	MOVW	z+0(FP), R1
   249  	MOVW	z_len+4(FP), R5
   250  	MOVW	x+12(FP), R2
   251  	MOVW	y+24(FP), R3
   252  	ADD	R5<<2, R1, R5
   253  	MOVW	$0, R4
   254  	B E9
   255  
   256  	// word loop
   257  L9:
   258  	MOVW.P	4(R2), R6
   259  	MULLU	R6, R3, (R7, R6)
   260  	ADD.S	R4, R6
   261  	ADC	R0, R7
   262  	MOVW	0(R1), R4
   263  	ADD.S	R4, R6
   264  	ADC	R0, R7
   265  	MOVW.P	R6, 4(R1)
   266  	MOVW	R7, R4
   267  E9:
   268  	TEQ	R1, R5
   269  	BNE	L9
   270  
   271  	MOVW	R4, c+28(FP)
   272  	RET
   273  

View as plain text