go / src /math /atan_s390x.s
AbdulElahGwaith's picture
Upload folder using huggingface_hub
e36aeda verified
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA 路atanrodataL8<> + 0(SB)/8, $0.199999999999554423E+00
DATA 路atanrodataL8<> + 8(SB)/8, $0.111111110136634272E+00
DATA 路atanrodataL8<> + 16(SB)/8, $-.142857142828026806E+00
DATA 路atanrodataL8<> + 24(SB)/8, $-.333333333333330928E+00
DATA 路atanrodataL8<> + 32(SB)/8, $0.769228118888682505E-01
DATA 路atanrodataL8<> + 40(SB)/8, $0.588059263575587687E-01
DATA 路atanrodataL8<> + 48(SB)/8, $-.666641501287528609E-01
DATA 路atanrodataL8<> + 56(SB)/8, $-.909090711945939878E-01
DATA 路atanrodataL8<> + 64(SB)/8, $0.472329433805024762E-01
DATA 路atanrodataL8<> + 72(SB)/8, $0.366935664549587481E-01
DATA 路atanrodataL8<> + 80(SB)/8, $-.422172007412067035E-01
DATA 路atanrodataL8<> + 88(SB)/8, $-.299856214685512712E-01
DATA 路atanrodataL8<> + 96(SB)/8, $0.220852012160300086E-01
DATA 路atanrodataL8<> + 104(SB)/8, $0.726338160757602439E-02
DATA 路atanrodataL8<> + 112(SB)/8, $0.843488472994227321E-03
DATA 路atanrodataL8<> + 120(SB)/8, $0.134893651284712515E-04
DATA 路atanrodataL8<> + 128(SB)/8, $-.525380587584426406E-01
DATA 路atanrodataL8<> + 136(SB)/8, $-.139950258898989925E-01
DATA 路atanrodataL8<> + 144(SB)/8, $-.291935324869629616E-02
DATA 路atanrodataL8<> + 152(SB)/8, $-.154797890856877418E-03
GLOBL 路atanrodataL8<> + 0(SB), RODATA, $160
DATA 路atanxpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
DATA 路atanxpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
DATA 路atanxpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
DATA 路atanxpi2h<> + 24(SB)/4, $0xc00330e4e4fa7b1b
GLOBL 路atanxpi2h<> + 0(SB), RODATA, $32
DATA 路atanxpim<> + 0(SB)/8, $0x3ff4f42b00000000
GLOBL 路atanxpim<> + 0(SB), RODATA, $8
DATA 路atanxmone<> + 0(SB)/8, $-1.0
GLOBL 路atanxmone<> + 0(SB), RODATA, $8
// Atan returns the arctangent, in radians, of the argument.
//
// Special cases are:
// Atan(卤0) = 卤0
// Atan(卤Inf) = 卤Pi/2Pi
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT 路atanAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
//special case Atan(卤0) = 卤0
FMOVD $(0.0), F1
FCMPU F0, F1
BEQ atanIsZero
MOVD $路atanrodataL8<>+0(SB), R5
MOVH $0x3FE0, R3
LGDR F0, R1
RISBGNZ $32, $63, $32, R1, R1
RLL $16, R1, R2
ANDW $0x7FF0, R2
MOVW R2, R6
MOVW R3, R7
CMPUBLE R6, R7, L6
MOVD $路atanxmone<>+0(SB), R3
FMOVD 0(R3), F2
WFDDB V0, V2, V0
RISBGZ $63, $63, $33, R1, R1
MOVD $路atanxpi2h<>+0(SB), R3
MOVWZ R1, R1
SLD $3, R1, R1
WORD $0x68813000 //ld %f8,0(%r1,%r3)
L6:
WFMDB V0, V0, V2
FMOVD 152(R5), F6
FMOVD 144(R5), F1
FMOVD 136(R5), F7
VLEG $0, 128(R5), V16
FMOVD 120(R5), F4
FMOVD 112(R5), F5
WFMADB V2, V4, V6, V4
WFMADB V2, V5, V1, V5
WFMDB V2, V2, V6
FMOVD 104(R5), F3
FMOVD 96(R5), F1
WFMADB V2, V3, V7, V3
MOVH $0x3FE0, R1
FMOVD 88(R5), F7
WFMADB V2, V1, V7, V1
FMOVD 80(R5), F7
WFMADB V6, V3, V1, V3
WFMADB V6, V4, V5, V4
WFMDB V6, V6, V1
FMOVD 72(R5), F5
WFMADB V2, V5, V7, V5
FMOVD 64(R5), F7
WFMADB V2, V7, V16, V7
VLEG $0, 56(R5), V16
WFMADB V6, V5, V7, V5
WFMADB V1, V4, V3, V4
FMOVD 48(R5), F7
FMOVD 40(R5), F3
WFMADB V2, V3, V7, V3
FMOVD 32(R5), F7
WFMADB V2, V7, V16, V7
VLEG $0, 24(R5), V16
WFMADB V1, V4, V5, V4
FMOVD 16(R5), F5
WFMADB V6, V3, V7, V3
FMOVD 8(R5), F7
WFMADB V2, V7, V5, V7
FMOVD 0(R5), F5
WFMADB V2, V5, V16, V5
WFMADB V1, V4, V3, V4
WFMADB V6, V7, V5, V6
FMUL F0, F2
FMADD F4, F1, F6
FMADD F6, F2, F0
MOVW R2, R6
MOVW R1, R7
CMPUBLE R6, R7, L1
MOVD $路atanxpim<>+0(SB), R1
WORD $0xED801000 //madb %f0,%f8,0(%r1)
BYTE $0x00
BYTE $0x1E
L1:
atanIsZero:
FMOVD F0, ret+8(FP)
RET