Text file
src/math/atan2_s390x.s
1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 #define PosInf 0x7FF0000000000000
8 #define NegInf 0xFFF0000000000000
9 #define NegZero 0x8000000000000000
10 #define Pi 0x400921FB54442D18
11 #define NegPi 0xC00921FB54442D18
12 #define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4
13 #define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4
14 #define PiDiv4 0x3FE921FB54442D18 // Pi/4
15 #define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4
16
17 // Minimax polynomial coefficients and other constants
18 DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
19 DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
20 DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
21 DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
22 DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
23 DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
24 DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
25 DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
26 DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
27 DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
28 DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
29 DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
30 DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
31 DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
32 DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
33 DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
34 DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
35 DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
36 DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
37 DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
38 GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
39
40 DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
41 DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
42 DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
43 DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
44 GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
45 DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
46 GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
47
48 // Atan2 returns the arc tangent of y/x, using
49 // the signs of the two to determine the quadrant
50 // of the return value.
51 //
52 // Special cases are (in order):
53 // Atan2(y, NaN) = NaN
54 // Atan2(NaN, x) = NaN
55 // Atan2(+0, x>=0) = +0
56 // Atan2(-0, x>=0) = -0
57 // Atan2(+0, x<=-0) = +Pi
58 // Atan2(-0, x<=-0) = -Pi
59 // Atan2(y>0, 0) = +Pi/2
60 // Atan2(y<0, 0) = -Pi/2
61 // Atan2(+Inf, +Inf) = +Pi/4
62 // Atan2(-Inf, +Inf) = -Pi/4
63 // Atan2(+Inf, -Inf) = 3Pi/4
64 // Atan2(-Inf, -Inf) = -3Pi/4
65 // Atan2(y, +Inf) = 0
66 // Atan2(y>0, -Inf) = +Pi
67 // Atan2(y<0, -Inf) = -Pi
68 // Atan2(+Inf, x) = +Pi/2
69 // Atan2(-Inf, x) = -Pi/2
70 // The algorithm used is minimax polynomial approximation
71 // with coefficients determined with a Remez exchange algorithm.
72
73 TEXT ·atan2Asm(SB), NOSPLIT, $0-24
74 // special case
75 MOVD x+0(FP), R1
76 MOVD y+8(FP), R2
77
78 // special case Atan2(NaN, y) = NaN
79 MOVD $~(1<<63), R5
80 AND R1, R5 // x = |x|
81 MOVD $PosInf, R3
82 CMPUBLT R3, R5, returnX
83
84 // special case Atan2(x, NaN) = NaN
85 MOVD $~(1<<63), R5
86 AND R2, R5
87 CMPUBLT R3, R5, returnY
88
89 MOVD $NegZero, R3
90 CMPUBEQ R3, R1, xIsNegZero
91
92 MOVD $0, R3
93 CMPUBEQ R3, R1, xIsPosZero
94
95 MOVD $PosInf, R4
96 CMPUBEQ R4, R2, yIsPosInf
97
98 MOVD $NegInf, R4
99 CMPUBEQ R4, R2, yIsNegInf
100 BR Normal
101 xIsNegZero:
102 // special case Atan(-0, y>=0) = -0
103 MOVD $0, R4
104 CMPBLE R4, R2, returnX
105
106 //special case Atan2(-0, y<=-0) = -Pi
107 MOVD $NegZero, R4
108 CMPBGE R4, R2, returnNegPi
109 BR Normal
110 xIsPosZero:
111 //special case Atan2(0, 0) = 0
112 MOVD $0, R4
113 CMPUBEQ R4, R2, returnX
114
115 //special case Atan2(0, y<=-0) = Pi
116 MOVD $NegZero, R4
117 CMPBGE R4, R2, returnPi
118 BR Normal
119 yIsNegInf:
120 //special case Atan2(+Inf, -Inf) = 3Pi/4
121 MOVD $PosInf, R3
122 CMPUBEQ R3, R1, posInfNegInf
123
124 //special case Atan2(-Inf, -Inf) = -3Pi/4
125 MOVD $NegInf, R3
126 CMPUBEQ R3, R1, negInfNegInf
127 BR Normal
128 yIsPosInf:
129 //special case Atan2(+Inf, +Inf) = Pi/4
130 MOVD $PosInf, R3
131 CMPUBEQ R3, R1, posInfPosInf
132
133 //special case Atan2(-Inf, +Inf) = -Pi/4
134 MOVD $NegInf, R3
135 CMPUBEQ R3, R1, negInfPosInf
136
137 //special case Atan2(x, +Inf) = Copysign(0, x)
138 CMPBLT R1, $0, returnNegZero
139 BR returnPosZero
140
141 Normal:
142 FMOVD x+0(FP), F0
143 FMOVD y+8(FP), F2
144 MOVD $·atan2rodataL25<>+0(SB), R9
145 LGDR F0, R2
146 LGDR F2, R1
147 RISBGNZ $32, $63, $32, R2, R2
148 RISBGNZ $32, $63, $32, R1, R1
149 WORD $0xB9170032 //llgtr %r3,%r2
150 RISBGZ $63, $63, $33, R2, R5
151 WORD $0xB9170041 //llgtr %r4,%r1
152 WFLCDB V0, V20
153 MOVW R4, R6
154 MOVW R3, R7
155 CMPUBLT R6, R7, L17
156 WFDDB V2, V0, V3
157 ADDW $2, R5, R2
158 MOVW R4, R6
159 MOVW R3, R7
160 CMPUBLE R6, R7, L20
161 L3:
162 WFMDB V3, V3, V4
163 VLEG $0, 152(R9), V18
164 VLEG $0, 144(R9), V16
165 FMOVD 136(R9), F1
166 FMOVD 128(R9), F5
167 FMOVD 120(R9), F6
168 WFMADB V4, V16, V5, V16
169 WFMADB V4, V6, V1, V6
170 FMOVD 112(R9), F7
171 WFMDB V4, V4, V1
172 WFMADB V4, V7, V18, V7
173 VLEG $0, 104(R9), V18
174 WFMADB V1, V6, V16, V6
175 CMPWU R4, R3
176 FMOVD 96(R9), F5
177 VLEG $0, 88(R9), V16
178 WFMADB V4, V5, V18, V5
179 VLEG $0, 80(R9), V18
180 VLEG $0, 72(R9), V22
181 WFMADB V4, V16, V18, V16
182 VLEG $0, 64(R9), V18
183 WFMADB V1, V7, V5, V7
184 WFMADB V4, V18, V22, V18
185 WFMDB V1, V1, V5
186 WFMADB V1, V16, V18, V16
187 VLEG $0, 56(R9), V18
188 WFMADB V5, V6, V7, V6
189 VLEG $0, 48(R9), V22
190 FMOVD 40(R9), F7
191 WFMADB V4, V7, V18, V7
192 VLEG $0, 32(R9), V18
193 WFMADB V5, V6, V16, V6
194 WFMADB V4, V18, V22, V18
195 VLEG $0, 24(R9), V16
196 WFMADB V1, V7, V18, V7
197 VLEG $0, 16(R9), V18
198 VLEG $0, 8(R9), V22
199 WFMADB V4, V18, V16, V18
200 VLEG $0, 0(R9), V16
201 WFMADB V5, V6, V7, V6
202 WFMADB V4, V16, V22, V16
203 FMUL F3, F4
204 WFMADB V1, V18, V16, V1
205 FMADD F6, F5, F1
206 WFMADB V4, V1, V3, V4
207 BLT L18
208 BGT L7
209 LTDBR F2, F2
210 BLTU L21
211 L8:
212 LTDBR F0, F0
213 BLTU L22
214 L9:
215 WFCHDBS V2, V0, V0
216 BNE L18
217 L7:
218 MOVW R1, R6
219 CMPBGE R6, $0, L1
220 L18:
221 RISBGZ $58, $60, $3, R2, R2
222 MOVD $·atan2xpi2h<>+0(SB), R1
223 MOVD ·atan2xpim<>+0(SB), R3
224 LDGR R3, F0
225 WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1)
226 BYTE $0x40
227 BYTE $0x1E
228 L1:
229 FMOVD F4, ret+16(FP)
230 RET
231
232 L20:
233 LTDBR F2, F2
234 BLTU L23
235 FMOVD F2, F6
236 L4:
237 LTDBR F0, F0
238 BLTU L24
239 FMOVD F0, F4
240 L5:
241 WFCHDBS V6, V4, V4
242 BEQ L3
243 L17:
244 WFDDB V0, V2, V4
245 BYTE $0x18 //lr %r2,%r5
246 BYTE $0x25
247 WORD $0xB3130034 //lcdbr %f3,%f4
248 BR L3
249 L23:
250 WORD $0xB3130062 //lcdbr %f6,%f2
251 BR L4
252 L22:
253 VLR V20, V0
254 BR L9
255 L21:
256 WORD $0xB3130022 //lcdbr %f2,%f2
257 BR L8
258 L24:
259 VLR V20, V4
260 BR L5
261 returnX: //the result is same as the first argument
262 MOVD R1, ret+16(FP)
263 RET
264 returnY: //the result is same as the second argument
265 MOVD R2, ret+16(FP)
266 RET
267 returnPi:
268 MOVD $Pi, R1
269 MOVD R1, ret+16(FP)
270 RET
271 returnNegPi:
272 MOVD $NegPi, R1
273 MOVD R1, ret+16(FP)
274 RET
275 posInfNegInf:
276 MOVD $Pi3Div4, R1
277 MOVD R1, ret+16(FP)
278 RET
279 negInfNegInf:
280 MOVD $NegPi3Div4, R1
281 MOVD R1, ret+16(FP)
282 RET
283 posInfPosInf:
284 MOVD $PiDiv4, R1
285 MOVD R1, ret+16(FP)
286 RET
287 negInfPosInf:
288 MOVD $NegPiDiv4, R1
289 MOVD R1, ret+16(FP)
290 RET
291 returnNegZero:
292 MOVD $NegZero, R1
293 MOVD R1, ret+16(FP)
294 RET
295 returnPosZero:
296 MOVD $0, ret+16(FP)
297 RET
298
View as plain text