Text file src/math/atan2_s390x.s

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  #define PosInf		0x7FF0000000000000
     8  #define NegInf		0xFFF0000000000000
     9  #define NegZero		0x8000000000000000
    10  #define Pi		0x400921FB54442D18
    11  #define NegPi		0xC00921FB54442D18
    12  #define Pi3Div4		0x4002D97C7F3321D2	// 3Pi/4
    13  #define NegPi3Div4	0xC002D97C7F3321D2	// -3Pi/4
    14  #define PiDiv4		0x3FE921FB54442D18	// Pi/4
    15  #define NegPiDiv4	0xBFE921FB54442D18	// -Pi/4
    16  
    17  // Minimax polynomial coefficients and other constants
    18  DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
    19  DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
    20  DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
    21  DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
    22  DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
    23  DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
    24  DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
    25  DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
    26  DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
    27  DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
    28  DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
    29  DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
    30  DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
    31  DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
    32  DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
    33  DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
    34  DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
    35  DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
    36  DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
    37  DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
    38  GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
    39  
    40  DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
    41  DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
    42  DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
    43  DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
    44  GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
    45  DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
    46  GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
    47  
    48  // Atan2 returns the arc tangent of y/x, using
    49  // the signs of the two to determine the quadrant
    50  // of the return value.
    51  //
    52  // Special cases are (in order):
    53  //      Atan2(y, NaN) = NaN
    54  //      Atan2(NaN, x) = NaN
    55  //      Atan2(+0, x>=0) = +0
    56  //      Atan2(-0, x>=0) = -0
    57  //      Atan2(+0, x<=-0) = +Pi
    58  //      Atan2(-0, x<=-0) = -Pi
    59  //      Atan2(y>0, 0) = +Pi/2
    60  //      Atan2(y<0, 0) = -Pi/2
    61  //      Atan2(+Inf, +Inf) = +Pi/4
    62  //      Atan2(-Inf, +Inf) = -Pi/4
    63  //      Atan2(+Inf, -Inf) = 3Pi/4
    64  //      Atan2(-Inf, -Inf) = -3Pi/4
    65  //      Atan2(y, +Inf) = 0
    66  //      Atan2(y>0, -Inf) = +Pi
    67  //      Atan2(y<0, -Inf) = -Pi
    68  //      Atan2(+Inf, x) = +Pi/2
    69  //      Atan2(-Inf, x) = -Pi/2
    70  // The algorithm used is minimax polynomial approximation
    71  // with coefficients determined with a Remez exchange algorithm.
    72  
    73  TEXT	·atan2Asm(SB), NOSPLIT, $0-24
    74  	// special case
    75  	MOVD	x+0(FP), R1
    76  	MOVD	y+8(FP), R2
    77  
    78  	// special case Atan2(NaN, y) = NaN
    79  	MOVD	$~(1<<63), R5
    80  	AND	R1, R5		// x = |x|
    81  	MOVD	$PosInf, R3
    82  	CMPUBLT	R3, R5, returnX
    83  
    84  	// special case Atan2(x, NaN) = NaN
    85  	MOVD	$~(1<<63), R5
    86  	AND	R2, R5
    87  	CMPUBLT R3, R5, returnY
    88  
    89  	MOVD	$NegZero, R3
    90  	CMPUBEQ	R3, R1, xIsNegZero
    91  
    92  	MOVD	$0, R3
    93  	CMPUBEQ	R3, R1, xIsPosZero
    94  
    95  	MOVD	$PosInf, R4
    96  	CMPUBEQ	R4, R2, yIsPosInf
    97  
    98  	MOVD	$NegInf, R4
    99  	CMPUBEQ	R4, R2, yIsNegInf
   100  	BR	Normal
   101  xIsNegZero:
   102  	// special case Atan(-0, y>=0) = -0
   103  	MOVD	$0, R4
   104  	CMPBLE	R4, R2, returnX
   105  
   106  	//special case Atan2(-0, y<=-0) = -Pi
   107  	MOVD	$NegZero, R4
   108  	CMPBGE	R4, R2, returnNegPi
   109  	BR	Normal
   110  xIsPosZero:
   111  	//special case Atan2(0, 0) = 0
   112  	MOVD	$0, R4
   113  	CMPUBEQ	R4, R2, returnX
   114  
   115  	//special case Atan2(0, y<=-0) = Pi
   116  	MOVD	$NegZero, R4
   117  	CMPBGE	R4, R2, returnPi
   118  	BR Normal
   119  yIsNegInf:
   120  	//special case Atan2(+Inf, -Inf) = 3Pi/4
   121  	MOVD	$PosInf, R3
   122  	CMPUBEQ	R3, R1, posInfNegInf
   123  
   124  	//special case Atan2(-Inf, -Inf) = -3Pi/4
   125  	MOVD	$NegInf, R3
   126  	CMPUBEQ	R3, R1, negInfNegInf
   127  	BR Normal
   128  yIsPosInf:
   129  	//special case Atan2(+Inf, +Inf) = Pi/4
   130  	MOVD	$PosInf, R3
   131  	CMPUBEQ	R3, R1, posInfPosInf
   132  
   133  	//special case Atan2(-Inf, +Inf) = -Pi/4
   134  	MOVD	$NegInf, R3
   135  	CMPUBEQ	R3, R1, negInfPosInf
   136  
   137  	//special case Atan2(x, +Inf) = Copysign(0, x)
   138  	CMPBLT	R1, $0, returnNegZero
   139  	BR returnPosZero
   140  
   141  Normal:
   142  	FMOVD	x+0(FP), F0
   143  	FMOVD	y+8(FP), F2
   144  	MOVD	$·atan2rodataL25<>+0(SB), R9
   145  	LGDR	F0, R2
   146  	LGDR	F2, R1
   147  	RISBGNZ	$32, $63, $32, R2, R2
   148  	RISBGNZ	$32, $63, $32, R1, R1
   149  	WORD	$0xB9170032	//llgtr	%r3,%r2
   150  	RISBGZ	$63, $63, $33, R2, R5
   151  	WORD	$0xB9170041	//llgtr	%r4,%r1
   152  	WFLCDB	V0, V20
   153  	MOVW	R4, R6
   154  	MOVW	R3, R7
   155  	CMPUBLT	R6, R7, L17
   156  	WFDDB	V2, V0, V3
   157  	ADDW	$2, R5, R2
   158  	MOVW	R4, R6
   159  	MOVW	R3, R7
   160  	CMPUBLE	R6, R7, L20
   161  L3:
   162  	WFMDB	V3, V3, V4
   163  	VLEG	$0, 152(R9), V18
   164  	VLEG	$0, 144(R9), V16
   165  	FMOVD	136(R9), F1
   166  	FMOVD	128(R9), F5
   167  	FMOVD	120(R9), F6
   168  	WFMADB	V4, V16, V5, V16
   169  	WFMADB	V4, V6, V1, V6
   170  	FMOVD	112(R9), F7
   171  	WFMDB	V4, V4, V1
   172  	WFMADB	V4, V7, V18, V7
   173  	VLEG	$0, 104(R9), V18
   174  	WFMADB	V1, V6, V16, V6
   175  	CMPWU	R4, R3
   176  	FMOVD	96(R9), F5
   177  	VLEG	$0, 88(R9), V16
   178  	WFMADB	V4, V5, V18, V5
   179  	VLEG	$0, 80(R9), V18
   180  	VLEG	$0, 72(R9), V22
   181  	WFMADB	V4, V16, V18, V16
   182  	VLEG	$0, 64(R9), V18
   183  	WFMADB	V1, V7, V5, V7
   184  	WFMADB	V4, V18, V22, V18
   185  	WFMDB	V1, V1, V5
   186  	WFMADB	V1, V16, V18, V16
   187  	VLEG	$0, 56(R9), V18
   188  	WFMADB	V5, V6, V7, V6
   189  	VLEG	$0, 48(R9), V22
   190  	FMOVD	40(R9), F7
   191  	WFMADB	V4, V7, V18, V7
   192  	VLEG	$0, 32(R9), V18
   193  	WFMADB	V5, V6, V16, V6
   194  	WFMADB	V4, V18, V22, V18
   195  	VLEG	$0, 24(R9), V16
   196  	WFMADB	V1, V7, V18, V7
   197  	VLEG	$0, 16(R9), V18
   198  	VLEG	$0, 8(R9), V22
   199  	WFMADB	V4, V18, V16, V18
   200  	VLEG	$0, 0(R9), V16
   201  	WFMADB	V5, V6, V7, V6
   202  	WFMADB	V4, V16, V22, V16
   203  	FMUL	F3, F4
   204  	WFMADB	V1, V18, V16, V1
   205  	FMADD	F6, F5, F1
   206  	WFMADB	V4, V1, V3, V4
   207  	BLT	L18
   208  	BGT	L7
   209  	LTDBR	F2, F2
   210  	BLTU	L21
   211  L8:
   212  	LTDBR	F0, F0
   213  	BLTU	L22
   214  L9:
   215  	WFCHDBS	V2, V0, V0
   216  	BNE	L18
   217  L7:
   218  	MOVW	R1, R6
   219  	CMPBGE	R6, $0, L1
   220  L18:
   221  	RISBGZ	$58, $60, $3, R2, R2
   222  	MOVD	$·atan2xpi2h<>+0(SB), R1
   223  	MOVD	·atan2xpim<>+0(SB), R3
   224  	LDGR	R3, F0
   225  	WORD	$0xED021000	//madb	%f4,%f0,0(%r2,%r1)
   226  	BYTE	$0x40
   227  	BYTE	$0x1E
   228  L1:
   229  	FMOVD	F4, ret+16(FP)
   230  	RET
   231  
   232  L20:
   233  	LTDBR	F2, F2
   234  	BLTU	L23
   235  	FMOVD	F2, F6
   236  L4:
   237  	LTDBR	F0, F0
   238  	BLTU	L24
   239  	FMOVD	F0, F4
   240  L5:
   241  	WFCHDBS	V6, V4, V4
   242  	BEQ	L3
   243  L17:
   244  	WFDDB	V0, V2, V4
   245  	BYTE	$0x18	//lr	%r2,%r5
   246  	BYTE	$0x25
   247  	WORD	$0xB3130034	//lcdbr	%f3,%f4
   248  	BR	L3
   249  L23:
   250  	WORD	$0xB3130062	//lcdbr	%f6,%f2
   251  	BR	L4
   252  L22:
   253  	VLR	V20, V0
   254  	BR	L9
   255  L21:
   256  	WORD	$0xB3130022	//lcdbr	%f2,%f2
   257  	BR	L8
   258  L24:
   259  	VLR	V20, V4
   260  	BR	L5
   261  returnX:	//the result is same as the first argument
   262  	MOVD	R1, ret+16(FP)
   263  	RET
   264  returnY:	//the result is same as the second argument
   265  	MOVD	R2, ret+16(FP)
   266  	RET
   267  returnPi:
   268  	MOVD	$Pi, R1
   269  	MOVD	R1, ret+16(FP)
   270  	RET
   271  returnNegPi:
   272  	MOVD	$NegPi, R1
   273  	MOVD	R1, ret+16(FP)
   274  	RET
   275  posInfNegInf:
   276  	MOVD	$Pi3Div4, R1
   277  	MOVD	R1, ret+16(FP)
   278  	RET
   279  negInfNegInf:
   280  	MOVD	$NegPi3Div4, R1
   281  	MOVD	R1, ret+16(FP)
   282  	RET
   283  posInfPosInf:
   284  	MOVD	$PiDiv4, R1
   285  	MOVD	R1, ret+16(FP)
   286  	RET
   287  negInfPosInf:
   288  	MOVD	$NegPiDiv4, R1
   289  	MOVD	R1, ret+16(FP)
   290  	RET
   291  returnNegZero:
   292  	MOVD	$NegZero, R1
   293  	MOVD	R1, ret+16(FP)
   294  	RET
   295  returnPosZero:
   296  	MOVD	$0, ret+16(FP)
   297  	RET
   298  

View as plain text