koichi12 commited on Feb 12, 2025

Commit

a18205e

verified ·

1 Parent(s): 211e5eb

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/__init__.py +14 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/bessel.py +1108 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/factorials.py +187 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/functions.py +645 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/hypergeometric.py +1413 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/theta.py +1049 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/zetazeros.py +1018 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_basic_ops.py +451 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_bitwise.py +188 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_convert.py +233 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_eigen.py +179 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_fp.py +1671 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_functions.py +920 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cudaGL.h +605 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cuda_device_runtime_api.h +268 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cuda_surface_types.h +103 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cudart_platform.h +57 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/device_atomic_functions.h +211 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_32_atomic_functions.h +131 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_32_intrinsics.h +510 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_32_intrinsics.hpp +588 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_60_atomic_functions.h +539 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvToolsExt.h +1561 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvToolsExtCudaRt.h +140 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvToolsExtSync.h +406 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExt.h +1499 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtCuda.h +170 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtSync.h +411 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInit.h +343 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInitDecls.h +73 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxLinkOnce.h +75 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/cmdline.py +668 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/filters/__init__.py +940 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/filters/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/formatters/rtf.py +349 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/lexers/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/scanner.py +104 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/style.py +203 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/styles/__init__.py +61 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/styles/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/styles/__pycache__/_mapping.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/token.py +214 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/__init__.py +1 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__init__.py +17 -0

.gitattributes CHANGED Viewed

@@ -64,3 +64,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpyt
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/model.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/model.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49a68f636d901e68fe4418b90d031c3589fe1aa4e9fdef65a20221a4e53a5962
+size 284965

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from . import functions
+# Hack to update methods
+from . import factorials
+from . import hypergeometric
+from . import expintegrals
+from . import bessel
+from . import orthogonal
+from . import theta
+from . import elliptic
+from . import signals
+from . import zeta
+from . import rszeta
+from . import zetazeros
+from . import qfunctions

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/bessel.py ADDED Viewed

	@@ -0,0 +1,1108 @@

+from .functions import defun, defun_wrapped
+@defun
+def j0(ctx, x):
+    """Computes the Bessel function `J_0(x)`. See :func:`~mpmath.besselj`."""
+    return ctx.besselj(0, x)
+@defun
+def j1(ctx, x):
+    """Computes the Bessel function `J_1(x)`.  See :func:`~mpmath.besselj`."""
+    return ctx.besselj(1, x)
+@defun
+def besselj(ctx, n, z, derivative=0, **kwargs):
+    if type(n) is int:
+        n_isint = True
+    else:
+        n = ctx.convert(n)
+        n_isint = ctx.isint(n)
+        if n_isint:
+            n = int(ctx._re(n))
+    if n_isint and n < 0:
+        return (-1)**n * ctx.besselj(-n, z, derivative, **kwargs)
+    z = ctx.convert(z)
+    M = ctx.mag(z)
+    if derivative:
+        d = ctx.convert(derivative)
+        # TODO: the integer special-casing shouldn't be necessary.
+        # However, the hypergeometric series gets inaccurate for large d
+        # because of inaccurate pole cancellation at a pole far from
+        # zero (needs to be fixed in hypercomb or hypsum)
+        if ctx.isint(d) and d >= 0:
+            d = int(d)
+            orig = ctx.prec
+            try:
+                ctx.prec += 15
+                v = ctx.fsum((-1)**k * ctx.binomial(d,k) * ctx.besselj(2*k+n-d,z)
+                    for k in range(d+1))
+            finally:
+                ctx.prec = orig
+            v *= ctx.mpf(2)**(-d)
+        else:
+            def h(n,d):
+                r = ctx.fmul(ctx.fmul(z, z, prec=ctx.prec+M), -0.25, exact=True)
+                B = [0.5*(n-d+1), 0.5*(n-d+2)]
+                T = [([2,ctx.pi,z],[d-2*n,0.5,n-d],[],B,[(n+1)*0.5,(n+2)*0.5],B+[n+1],r)]
+                return T
+            v = ctx.hypercomb(h, [n,d], **kwargs)
+    else:
+        # Fast case: J_n(x), n int, appropriate magnitude for fixed-point calculation
+        if (not derivative) and n_isint and abs(M) < 10 and abs(n) < 20:
+            try:
+                return ctx._besselj(n, z)
+            except NotImplementedError:
+                pass
+        if not z:
+            if not n:
+                v = ctx.one + n+z
+            elif ctx.re(n) > 0:
+                v = n*z
+            else:
+                v = ctx.inf + z + n
+        else:
+            #v = 0
+            orig = ctx.prec
+            try:
+                # XXX: workaround for accuracy in low level hypergeometric series
+                # when alternating, large arguments
+                ctx.prec += min(3*abs(M), ctx.prec)
+                w = ctx.fmul(z, 0.5, exact=True)
+                def h(n):
+                    r = ctx.fneg(ctx.fmul(w, w, prec=max(0,ctx.prec+M)), exact=True)
+                    return [([w], [n], [], [n+1], [], [n+1], r)]
+                v = ctx.hypercomb(h, [n], **kwargs)
+            finally:
+                ctx.prec = orig
+        v = +v
+    return v
+@defun
+def besseli(ctx, n, z, derivative=0, **kwargs):
+    n = ctx.convert(n)
+    z = ctx.convert(z)
+    if not z:
+        if derivative:
+            raise ValueError
+        if not n:
+            # I(0,0) = 1
+            return 1+n+z
+        if ctx.isint(n):
+            return 0*(n+z)
+        r = ctx.re(n)
+        if r == 0:
+            return ctx.nan*(n+z)
+        elif r > 0:
+            return 0*(n+z)
+        else:
+            return ctx.inf+(n+z)
+    M = ctx.mag(z)
+    if derivative:
+        d = ctx.convert(derivative)
+        def h(n,d):
+            r = ctx.fmul(ctx.fmul(z, z, prec=ctx.prec+M), 0.25, exact=True)
+            B = [0.5*(n-d+1), 0.5*(n-d+2), n+1]
+            T = [([2,ctx.pi,z],[d-2*n,0.5,n-d],[n+1],B,[(n+1)*0.5,(n+2)*0.5],B,r)]
+            return T
+        v = ctx.hypercomb(h, [n,d], **kwargs)
+    else:
+        def h(n):
+            w = ctx.fmul(z, 0.5, exact=True)
+            r = ctx.fmul(w, w, prec=max(0,ctx.prec+M))
+            return [([w], [n], [], [n+1], [], [n+1], r)]
+        v = ctx.hypercomb(h, [n], **kwargs)
+    return v
+@defun_wrapped
+def bessely(ctx, n, z, derivative=0, **kwargs):
+    if not z:
+        if derivative:
+            # Not implemented
+            raise ValueError
+        if not n:
+            # ~ log(z/2)
+            return -ctx.inf + (n+z)
+        if ctx.im(n):
+            return ctx.nan * (n+z)
+        r = ctx.re(n)
+        q = n+0.5
+        if ctx.isint(q):
+            if n > 0:
+                return -ctx.inf + (n+z)
+            else:
+                return 0 * (n+z)
+        if r < 0 and int(ctx.floor(q)) % 2:
+            return ctx.inf + (n+z)
+        else:
+            return ctx.ninf + (n+z)
+    # XXX: use hypercomb
+    ctx.prec += 10
+    m, d = ctx.nint_distance(n)
+    if d < -ctx.prec:
+        h = +ctx.eps
+        ctx.prec *= 2
+        n += h
+    elif d < 0:
+        ctx.prec -= d
+    # TODO: avoid cancellation for imaginary arguments
+    cos, sin = ctx.cospi_sinpi(n)
+    return (ctx.besselj(n,z,derivative,**kwargs)*cos - \
+        ctx.besselj(-n,z,derivative,**kwargs))/sin
+@defun_wrapped
+def besselk(ctx, n, z, **kwargs):
+    if not z:
+        return ctx.inf
+    M = ctx.mag(z)
+    if M < 1:
+        # Represent as limit definition
+        def h(n):
+            r = (z/2)**2
+            T1 = [z, 2], [-n, n-1], [n], [], [], [1-n], r
+            T2 = [z, 2], [n, -n-1], [-n], [], [], [1+n], r
+            return T1, T2
+    # We could use the limit definition always, but it leads
+    # to very bad cancellation (of exponentially large terms)
+    # for large real z
+    # Instead represent in terms of 2F0
+    else:
+        ctx.prec += M
+        def h(n):
+            return [([ctx.pi/2, z, ctx.exp(-z)], [0.5,-0.5,1], [], [], \
+                [n+0.5, 0.5-n], [], -1/(2*z))]
+    return ctx.hypercomb(h, [n], **kwargs)
+@defun_wrapped
+def hankel1(ctx,n,x,**kwargs):
+    return ctx.besselj(n,x,**kwargs) + ctx.j*ctx.bessely(n,x,**kwargs)
+@defun_wrapped
+def hankel2(ctx,n,x,**kwargs):
+    return ctx.besselj(n,x,**kwargs) - ctx.j*ctx.bessely(n,x,**kwargs)
+@defun_wrapped
+def whitm(ctx,k,m,z,**kwargs):
+    if z == 0:
+        # M(k,m,z) = 0^(1/2+m)
+        if ctx.re(m) > -0.5:
+            return z
+        elif ctx.re(m) < -0.5:
+            return ctx.inf + z
+        else:
+            return ctx.nan * z
+    x = ctx.fmul(-0.5, z, exact=True)
+    y = 0.5+m
+    return ctx.exp(x) * z**y * ctx.hyp1f1(y-k, 1+2*m, z, **kwargs)
+@defun_wrapped
+def whitw(ctx,k,m,z,**kwargs):
+    if z == 0:
+        g = abs(ctx.re(m))
+        if g < 0.5:
+            return z
+        elif g > 0.5:
+            return ctx.inf + z
+        else:
+            return ctx.nan * z
+    x = ctx.fmul(-0.5, z, exact=True)
+    y = 0.5+m
+    return ctx.exp(x) * z**y * ctx.hyperu(y-k, 1+2*m, z, **kwargs)
+@defun
+def hyperu(ctx, a, b, z, **kwargs):
+    a, atype = ctx._convert_param(a)
+    b, btype = ctx._convert_param(b)
+    z = ctx.convert(z)
+    if not z:
+        if ctx.re(b) <= 1:
+            return ctx.gammaprod([1-b],[a-b+1])
+        else:
+            return ctx.inf + z
+    bb = 1+a-b
+    bb, bbtype = ctx._convert_param(bb)
+    try:
+        orig = ctx.prec
+        try:
+            ctx.prec += 10
+            v = ctx.hypsum(2, 0, (atype, bbtype), [a, bb], -1/z, maxterms=ctx.prec)
+            return v / z**a
+        finally:
+            ctx.prec = orig
+    except ctx.NoConvergence:
+        pass
+    def h(a,b):
+        w = ctx.sinpi(b)
+        T1 = ([ctx.pi,w],[1,-1],[],[a-b+1,b],[a],[b],z)
+        T2 = ([-ctx.pi,w,z],[1,-1,1-b],[],[a,2-b],[a-b+1],[2-b],z)
+        return T1, T2
+    return ctx.hypercomb(h, [a,b], **kwargs)
+@defun
+def struveh(ctx,n,z, **kwargs):
+    n = ctx.convert(n)
+    z = ctx.convert(z)
+    # http://functions.wolfram.com/Bessel-TypeFunctions/StruveH/26/01/02/
+    def h(n):
+        return [([z/2, 0.5*ctx.sqrt(ctx.pi)], [n+1, -1], [], [n+1.5], [1], [1.5, n+1.5], -(z/2)**2)]
+    return ctx.hypercomb(h, [n], **kwargs)
+@defun
+def struvel(ctx,n,z, **kwargs):
+    n = ctx.convert(n)
+    z = ctx.convert(z)
+    # http://functions.wolfram.com/Bessel-TypeFunctions/StruveL/26/01/02/
+    def h(n):
+        return [([z/2, 0.5*ctx.sqrt(ctx.pi)], [n+1, -1], [], [n+1.5], [1], [1.5, n+1.5], (z/2)**2)]
+    return ctx.hypercomb(h, [n], **kwargs)
+def _anger(ctx,which,v,z,**kwargs):
+    v = ctx._convert_param(v)[0]
+    z = ctx.convert(z)
+    def h(v):
+        b = ctx.mpq_1_2
+        u = v*b
+        m = b*3
+        a1,a2,b1,b2 = m-u, m+u, 1-u, 1+u
+        c, s = ctx.cospi_sinpi(u)
+        if which == 0:
+            A, B = [b*z, s], [c]
+        if which == 1:
+            A, B = [b*z, -c], [s]
+        w = ctx.square_exp_arg(z, mult=-0.25)
+        T1 = A, [1, 1], [], [a1,a2], [1], [a1,a2], w
+        T2 = B, [1], [], [b1,b2], [1], [b1,b2], w
+        return T1, T2
+    return ctx.hypercomb(h, [v], **kwargs)
+@defun
+def angerj(ctx, v, z, **kwargs):
+    return _anger(ctx, 0, v, z, **kwargs)
+@defun
+def webere(ctx, v, z, **kwargs):
+    return _anger(ctx, 1, v, z, **kwargs)
+@defun
+def lommels1(ctx, u, v, z, **kwargs):
+    u = ctx._convert_param(u)[0]
+    v = ctx._convert_param(v)[0]
+    z = ctx.convert(z)
+    def h(u,v):
+        b = ctx.mpq_1_2
+        w = ctx.square_exp_arg(z, mult=-0.25)
+        return ([u-v+1, u+v+1, z], [-1, -1, u+1], [], [], [1], \
+            [b*(u-v+3),b*(u+v+3)], w),
+    return ctx.hypercomb(h, [u,v], **kwargs)
+@defun
+def lommels2(ctx, u, v, z, **kwargs):
+    u = ctx._convert_param(u)[0]
+    v = ctx._convert_param(v)[0]
+    z = ctx.convert(z)
+    # Asymptotic expansion (GR p. 947) -- need to be careful
+    # not to use for small arguments
+    # def h(u,v):
+    #    b = ctx.mpq_1_2
+    #    w = -(z/2)**(-2)
+    #    return ([z], [u-1], [], [], [b*(1-u+v)], [b*(1-u-v)], w),
+    def h(u,v):
+        b = ctx.mpq_1_2
+        w = ctx.square_exp_arg(z, mult=-0.25)
+        T1 = [u-v+1, u+v+1, z], [-1, -1, u+1], [], [], [1], [b*(u-v+3),b*(u+v+3)], w
+        T2 = [2, z], [u+v-1, -v], [v, b*(u+v+1)], [b*(v-u+1)], [], [1-v], w
+        T3 = [2, z], [u-v-1, v], [-v, b*(u-v+1)], [b*(1-u-v)], [], [1+v], w
+        #c1 = ctx.cospi((u-v)*b)
+        #c2 = ctx.cospi((u+v)*b)
+        #s = ctx.sinpi(v)
+        #r1 = (u-v+1)*b
+        #r2 = (u+v+1)*b
+        #T2 = [c1, s, z, 2], [1, -1, -v, v], [], [-v+1], [], [-v+1], w
+        #T3 = [-c2, s, z, 2], [1, -1, v, -v], [], [v+1], [], [v+1], w
+        #T2 = [c1, s, z, 2], [1, -1, -v, v+u-1], [r1, r2], [-v+1], [], [-v+1], w
+        #T3 = [-c2, s, z, 2], [1, -1, v, -v+u-1], [r1, r2], [v+1], [], [v+1], w
+        return T1, T2, T3
+    return ctx.hypercomb(h, [u,v], **kwargs)
+@defun
+def ber(ctx, n, z, **kwargs):
+    n = ctx.convert(n)
+    z = ctx.convert(z)
+    # http://functions.wolfram.com/Bessel-TypeFunctions/KelvinBer2/26/01/02/0001/
+    def h(n):
+        r = -(z/4)**4
+        cos, sin = ctx.cospi_sinpi(-0.75*n)
+        T1 = [cos, z/2], [1, n], [], [n+1], [], [0.5, 0.5*(n+1), 0.5*n+1], r
+        T2 = [sin, z/2], [1, n+2], [], [n+2], [], [1.5, 0.5*(n+3), 0.5*n+1], r
+        return T1, T2
+    return ctx.hypercomb(h, [n], **kwargs)
+@defun
+def bei(ctx, n, z, **kwargs):
+    n = ctx.convert(n)
+    z = ctx.convert(z)
+    # http://functions.wolfram.com/Bessel-TypeFunctions/KelvinBei2/26/01/02/0001/
+    def h(n):
+        r = -(z/4)**4
+        cos, sin = ctx.cospi_sinpi(0.75*n)
+        T1 = [cos, z/2], [1, n+2], [], [n+2], [], [1.5, 0.5*(n+3), 0.5*n+1], r
+        T2 = [sin, z/2], [1, n], [], [n+1], [], [0.5, 0.5*(n+1), 0.5*n+1], r
+        return T1, T2
+    return ctx.hypercomb(h, [n], **kwargs)
+@defun
+def ker(ctx, n, z, **kwargs):
+    n = ctx.convert(n)
+    z = ctx.convert(z)
+    # http://functions.wolfram.com/Bessel-TypeFunctions/KelvinKer2/26/01/02/0001/
+    def h(n):
+        r = -(z/4)**4
+        cos1, sin1 = ctx.cospi_sinpi(0.25*n)
+        cos2, sin2 = ctx.cospi_sinpi(0.75*n)
+        T1 = [2, z, 4*cos1], [-n-3, n, 1], [-n], [], [], [0.5, 0.5*(1+n), 0.5*(n+2)], r
+        T2 = [2, z, -sin1], [-n-3, 2+n, 1], [-n-1], [], [], [1.5, 0.5*(3+n), 0.5*(n+2)], r
+        T3 = [2, z, 4*cos2], [n-3, -n, 1], [n], [], [], [0.5, 0.5*(1-n), 1-0.5*n], r
+        T4 = [2, z, -sin2], [n-3, 2-n, 1], [n-1], [], [], [1.5, 0.5*(3-n), 1-0.5*n], r
+        return T1, T2, T3, T4
+    return ctx.hypercomb(h, [n], **kwargs)
+@defun
+def kei(ctx, n, z, **kwargs):
+    n = ctx.convert(n)
+    z = ctx.convert(z)
+    # http://functions.wolfram.com/Bessel-TypeFunctions/KelvinKei2/26/01/02/0001/
+    def h(n):
+        r = -(z/4)**4
+        cos1, sin1 = ctx.cospi_sinpi(0.75*n)
+        cos2, sin2 = ctx.cospi_sinpi(0.25*n)
+        T1 = [-cos1, 2, z], [1, n-3, 2-n], [n-1], [], [], [1.5, 0.5*(3-n), 1-0.5*n], r
+        T2 = [-sin1, 2, z], [1, n-1, -n], [n], [], [], [0.5, 0.5*(1-n), 1-0.5*n], r
+        T3 = [-sin2, 2, z], [1, -n-1, n], [-n], [], [], [0.5, 0.5*(n+1), 0.5*(n+2)], r
+        T4 = [-cos2, 2, z], [1, -n-3, n+2], [-n-1], [], [], [1.5, 0.5*(n+3), 0.5*(n+2)], r
+        return T1, T2, T3, T4
+    return ctx.hypercomb(h, [n], **kwargs)
+# TODO: do this more generically?
+def c_memo(f):
+    name = f.__name__
+    def f_wrapped(ctx):
+        cache = ctx._misc_const_cache
+        prec = ctx.prec
+        p,v = cache.get(name, (-1,0))
+        if p >= prec:
+            return +v
+        else:
+            cache[name] = (prec, f(ctx))
+            return cache[name][1]
+    return f_wrapped
+@c_memo
+def _airyai_C1(ctx):
+    return 1 / (ctx.cbrt(9) * ctx.gamma(ctx.mpf(2)/3))
+@c_memo
+def _airyai_C2(ctx):
+    return -1 / (ctx.cbrt(3) * ctx.gamma(ctx.mpf(1)/3))
+@c_memo
+def _airybi_C1(ctx):
+    return 1 / (ctx.nthroot(3,6) * ctx.gamma(ctx.mpf(2)/3))
+@c_memo
+def _airybi_C2(ctx):
+    return ctx.nthroot(3,6) / ctx.gamma(ctx.mpf(1)/3)
+def _airybi_n2_inf(ctx):
+    prec = ctx.prec
+    try:
+        v = ctx.power(3,'2/3')*ctx.gamma('2/3')/(2*ctx.pi)
+    finally:
+        ctx.prec = prec
+    return +v
+# Derivatives at z = 0
+# TODO: could be expressed more elegantly using triple factorials
+def _airyderiv_0(ctx, z, n, ntype, which):
+    if ntype == 'Z':
+        if n < 0:
+            return z
+        r = ctx.mpq_1_3
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            v = ctx.gamma((n+1)*r) * ctx.power(3,n*r) / ctx.pi
+            if which == 0:
+                v *= ctx.sinpi(2*(n+1)*r)
+                v /= ctx.power(3,'2/3')
+            else:
+                v *= abs(ctx.sinpi(2*(n+1)*r))
+                v /= ctx.power(3,'1/6')
+        finally:
+            ctx.prec = prec
+        return +v + z
+    else:
+        # singular (does the limit exist?)
+        raise NotImplementedError
+@defun
+def airyai(ctx, z, derivative=0, **kwargs):
+    z = ctx.convert(z)
+    if derivative:
+        n, ntype = ctx._convert_param(derivative)
+    else:
+        n = 0
+    # Values at infinities
+    if not ctx.isnormal(z) and z:
+        if n and ntype == 'Z':
+            if n == -1:
+                if z == ctx.inf:
+                    return ctx.mpf(1)/3 + 1/z
+                if z == ctx.ninf:
+                    return ctx.mpf(-2)/3 + 1/z
+            if n < -1:
+                if z == ctx.inf:
+                    return z
+                if z == ctx.ninf:
+                    return (-1)**n * (-z)
+        if (not n) and z == ctx.inf or z == ctx.ninf:
+            return 1/z
+        # TODO: limits
+        raise ValueError("essential singularity of Ai(z)")
+    # Account for exponential scaling
+    if z:
+        extraprec = max(0, int(1.5*ctx.mag(z)))
+    else:
+        extraprec = 0
+    if n:
+        if n == 1:
+            def h():
+                # http://functions.wolfram.com/03.07.06.0005.01
+                if ctx._re(z) > 4:
+                    ctx.prec += extraprec
+                    w = z**1.5; r = -0.75/w; u = -2*w/3
+                    ctx.prec -= extraprec
+                    C = -ctx.exp(u)/(2*ctx.sqrt(ctx.pi))*ctx.nthroot(z,4)
+                    return ([C],[1],[],[],[(-1,6),(7,6)],[],r),
+                # http://functions.wolfram.com/03.07.26.0001.01
+                else:
+                    ctx.prec += extraprec
+                    w = z**3 / 9
+                    ctx.prec -= extraprec
+                    C1 = _airyai_C1(ctx) * 0.5
+                    C2 = _airyai_C2(ctx)
+                    T1 = [C1,z],[1,2],[],[],[],[ctx.mpq_5_3],w
+                    T2 = [C2],[1],[],[],[],[ctx.mpq_1_3],w
+                    return T1, T2
+            return ctx.hypercomb(h, [], **kwargs)
+        else:
+            if z == 0:
+                return _airyderiv_0(ctx, z, n, ntype, 0)
+            # http://functions.wolfram.com/03.05.20.0004.01
+            def h(n):
+                ctx.prec += extraprec
+                w = z**3/9
+                ctx.prec -= extraprec
+                q13,q23,q43 = ctx.mpq_1_3, ctx.mpq_2_3, ctx.mpq_4_3
+                a1=q13; a2=1; b1=(1-n)*q13; b2=(2-n)*q13; b3=1-n*q13
+                T1 = [3, z], [n-q23, -n], [a1], [b1,b2,b3], \
+                    [a1,a2], [b1,b2,b3], w
+                a1=q23; b1=(2-n)*q13; b2=1-n*q13; b3=(4-n)*q13
+                T2 = [3, z, -z], [n-q43, -n, 1], [a1], [b1,b2,b3], \
+                    [a1,a2], [b1,b2,b3], w
+                return T1, T2
+            v = ctx.hypercomb(h, [n], **kwargs)
+            if ctx._is_real_type(z) and ctx.isint(n):
+                v = ctx._re(v)
+            return v
+    else:
+        def h():
+            if ctx._re(z) > 4:
+                # We could use 1F1, but it results in huge cancellation;
+                # the following expansion is better.
+                # TODO: asymptotic series for derivatives
+                ctx.prec += extraprec
+                w = z**1.5; r = -0.75/w; u = -2*w/3
+                ctx.prec -= extraprec
+                C = ctx.exp(u)/(2*ctx.sqrt(ctx.pi)*ctx.nthroot(z,4))
+                return ([C],[1],[],[],[(1,6),(5,6)],[],r),
+            else:
+                ctx.prec += extraprec
+                w = z**3 / 9
+                ctx.prec -= extraprec
+                C1 = _airyai_C1(ctx)
+                C2 = _airyai_C2(ctx)
+                T1 = [C1],[1],[],[],[],[ctx.mpq_2_3],w
+                T2 = [z*C2],[1],[],[],[],[ctx.mpq_4_3],w
+                return T1, T2
+        return ctx.hypercomb(h, [], **kwargs)
+@defun
+def airybi(ctx, z, derivative=0, **kwargs):
+    z = ctx.convert(z)
+    if derivative:
+        n, ntype = ctx._convert_param(derivative)
+    else:
+        n = 0
+    # Values at infinities
+    if not ctx.isnormal(z) and z:
+        if n and ntype == 'Z':
+            if z == ctx.inf:
+                return z
+            if z == ctx.ninf:
+                if n == -1:
+                    return 1/z
+                if n == -2:
+                    return _airybi_n2_inf(ctx)
+                if n < -2:
+                    return (-1)**n * (-z)
+        if not n:
+            if z == ctx.inf:
+                return z
+            if z == ctx.ninf:
+                return 1/z
+        # TODO: limits
+        raise ValueError("essential singularity of Bi(z)")
+    if z:
+        extraprec = max(0, int(1.5*ctx.mag(z)))
+    else:
+        extraprec = 0
+    if n:
+        if n == 1:
+            # http://functions.wolfram.com/03.08.26.0001.01
+            def h():
+                ctx.prec += extraprec
+                w = z**3 / 9
+                ctx.prec -= extraprec
+                C1 = _airybi_C1(ctx)*0.5
+                C2 = _airybi_C2(ctx)
+                T1 = [C1,z],[1,2],[],[],[],[ctx.mpq_5_3],w
+                T2 = [C2],[1],[],[],[],[ctx.mpq_1_3],w
+                return T1, T2
+            return ctx.hypercomb(h, [], **kwargs)
+        else:
+            if z == 0:
+                return _airyderiv_0(ctx, z, n, ntype, 1)
+            def h(n):
+                ctx.prec += extraprec
+                w = z**3/9
+                ctx.prec -= extraprec
+                q13,q23,q43 = ctx.mpq_1_3, ctx.mpq_2_3, ctx.mpq_4_3
+                q16 = ctx.mpq_1_6
+                q56 = ctx.mpq_5_6
+                a1=q13; a2=1; b1=(1-n)*q13; b2=(2-n)*q13; b3=1-n*q13
+                T1 = [3, z], [n-q16, -n], [a1], [b1,b2,b3], \
+                    [a1,a2], [b1,b2,b3], w
+                a1=q23; b1=(2-n)*q13; b2=1-n*q13; b3=(4-n)*q13
+                T2 = [3, z], [n-q56, 1-n], [a1], [b1,b2,b3], \
+                    [a1,a2], [b1,b2,b3], w
+                return T1, T2
+            v = ctx.hypercomb(h, [n], **kwargs)
+            if ctx._is_real_type(z) and ctx.isint(n):
+                v = ctx._re(v)
+            return v
+    else:
+        def h():
+            ctx.prec += extraprec
+            w = z**3 / 9
+            ctx.prec -= extraprec
+            C1 = _airybi_C1(ctx)
+            C2 = _airybi_C2(ctx)
+            T1 = [C1],[1],[],[],[],[ctx.mpq_2_3],w
+            T2 = [z*C2],[1],[],[],[],[ctx.mpq_4_3],w
+            return T1, T2
+        return ctx.hypercomb(h, [], **kwargs)
+def _airy_zero(ctx, which, k, derivative, complex=False):
+    # Asymptotic formulas are given in DLMF section 9.9
+    def U(t): return t**(2/3.)*(1-7/(t**2*48))
+    def T(t): return t**(2/3.)*(1+5/(t**2*48))
+    k = int(k)
+    if k < 1:
+        raise ValueError("k cannot be less than 1")
+    if not derivative in (0,1):
+        raise ValueError("Derivative should lie between 0 and 1")
+    if which == 0:
+        if derivative:
+            return ctx.findroot(lambda z: ctx.airyai(z,1),
+                -U(3*ctx.pi*(4*k-3)/8))
+        return ctx.findroot(ctx.airyai, -T(3*ctx.pi*(4*k-1)/8))
+    if which == 1 and complex == False:
+        if derivative:
+            return ctx.findroot(lambda z: ctx.airybi(z,1),
+                -U(3*ctx.pi*(4*k-1)/8))
+        return ctx.findroot(ctx.airybi, -T(3*ctx.pi*(4*k-3)/8))
+    if which == 1 and complex == True:
+        if derivative:
+            t = 3*ctx.pi*(4*k-3)/8 + 0.75j*ctx.ln2
+            s = ctx.expjpi(ctx.mpf(1)/3) * T(t)
+            return ctx.findroot(lambda z: ctx.airybi(z,1), s)
+        t = 3*ctx.pi*(4*k-1)/8 + 0.75j*ctx.ln2
+        s = ctx.expjpi(ctx.mpf(1)/3) * U(t)
+        return ctx.findroot(ctx.airybi, s)
+@defun
+def airyaizero(ctx, k, derivative=0):
+    return _airy_zero(ctx, 0, k, derivative, False)
+@defun
+def airybizero(ctx, k, derivative=0, complex=False):
+    return _airy_zero(ctx, 1, k, derivative, complex)
+def _scorer(ctx, z, which, kwargs):
+    z = ctx.convert(z)
+    if ctx.isinf(z):
+        if z == ctx.inf:
+            if which == 0: return 1/z
+            if which == 1: return z
+        if z == ctx.ninf:
+            return 1/z
+        raise ValueError("essential singularity")
+    if z:
+        extraprec = max(0, int(1.5*ctx.mag(z)))
+    else:
+        extraprec = 0
+    if kwargs.get('derivative'):
+        raise NotImplementedError
+    # Direct asymptotic expansions, to avoid
+    # exponentially large cancellation
+    try:
+        if ctx.mag(z) > 3:
+            if which == 0 and abs(ctx.arg(z)) < ctx.pi/3 * 0.999:
+                def h():
+                    return (([ctx.pi,z],[-1,-1],[],[],[(1,3),(2,3),1],[],9/z**3),)
+                return ctx.hypercomb(h, [], maxterms=ctx.prec, force_series=True)
+            if which == 1 and abs(ctx.arg(-z)) < 2*ctx.pi/3 * 0.999:
+                def h():
+                    return (([-ctx.pi,z],[-1,-1],[],[],[(1,3),(2,3),1],[],9/z**3),)
+                return ctx.hypercomb(h, [], maxterms=ctx.prec, force_series=True)
+    except ctx.NoConvergence:
+        pass
+    def h():
+        A = ctx.airybi(z, **kwargs)/3
+        B = -2*ctx.pi
+        if which == 1:
+            A *= 2
+            B *= -1
+        ctx.prec += extraprec
+        w = z**3/9
+        ctx.prec -= extraprec
+        T1 = [A], [1], [], [], [], [], 0
+        T2 = [B,z], [-1,2], [], [], [1], [ctx.mpq_4_3,ctx.mpq_5_3], w
+        return T1, T2
+    return ctx.hypercomb(h, [], **kwargs)
+@defun
+def scorergi(ctx, z, **kwargs):
+    return _scorer(ctx, z, 0, kwargs)
+@defun
+def scorerhi(ctx, z, **kwargs):
+    return _scorer(ctx, z, 1, kwargs)
+@defun_wrapped
+def coulombc(ctx, l, eta, _cache={}):
+    if (l, eta) in _cache and _cache[l,eta][0] >= ctx.prec:
+        return +_cache[l,eta][1]
+    G3 = ctx.loggamma(2*l+2)
+    G1 = ctx.loggamma(1+l+ctx.j*eta)
+    G2 = ctx.loggamma(1+l-ctx.j*eta)
+    v = 2**l * ctx.exp((-ctx.pi*eta+G1+G2)/2 - G3)
+    if not (ctx.im(l) or ctx.im(eta)):
+        v = ctx.re(v)
+    _cache[l,eta] = (ctx.prec, v)
+    return v
+@defun_wrapped
+def coulombf(ctx, l, eta, z, w=1, chop=True, **kwargs):
+    # Regular Coulomb wave function
+    # Note: w can be either 1 or -1; the other may be better in some cases
+    # TODO: check that chop=True chops when and only when it should
+    #ctx.prec += 10
+    def h(l, eta):
+        try:
+            jw = ctx.j*w
+            jwz = ctx.fmul(jw, z, exact=True)
+            jwz2 = ctx.fmul(jwz, -2, exact=True)
+            C = ctx.coulombc(l, eta)
+            T1 = [C, z, ctx.exp(jwz)], [1, l+1, 1], [], [], [1+l+jw*eta], \
+                [2*l+2], jwz2
+        except ValueError:
+            T1 = [0], [-1], [], [], [], [], 0
+        return (T1,)
+    v = ctx.hypercomb(h, [l,eta], **kwargs)
+    if chop and (not ctx.im(l)) and (not ctx.im(eta)) and (not ctx.im(z)) and \
+        (ctx.re(z) >= 0):
+        v = ctx.re(v)
+    return v
+@defun_wrapped
+def _coulomb_chi(ctx, l, eta, _cache={}):
+    if (l, eta) in _cache and _cache[l,eta][0] >= ctx.prec:
+        return _cache[l,eta][1]
+    def terms():
+        l2 = -l-1
+        jeta = ctx.j*eta
+        return [ctx.loggamma(1+l+jeta) * (-0.5j),
+            ctx.loggamma(1+l-jeta) * (0.5j),
+            ctx.loggamma(1+l2+jeta) * (0.5j),
+            ctx.loggamma(1+l2-jeta) * (-0.5j),
+            -(l+0.5)*ctx.pi]
+    v = ctx.sum_accurately(terms, 1)
+    _cache[l,eta] = (ctx.prec, v)
+    return v
+@defun_wrapped
+def coulombg(ctx, l, eta, z, w=1, chop=True, **kwargs):
+    # Irregular Coulomb wave function
+    # Note: w can be either 1 or -1; the other may be better in some cases
+    # TODO: check that chop=True chops when and only when it should
+    if not ctx._im(l):
+        l = ctx._re(l)  # XXX: for isint
+    def h(l, eta):
+        # Force perturbation for integers and half-integers
+        if ctx.isint(l*2):
+            T1 = [0], [-1], [], [], [], [], 0
+            return (T1,)
+        l2 = -l-1
+        try:
+            chi = ctx._coulomb_chi(l, eta)
+            jw = ctx.j*w
+            s = ctx.sin(chi); c = ctx.cos(chi)
+            C1 = ctx.coulombc(l,eta)
+            C2 = ctx.coulombc(l2,eta)
+            u = ctx.exp(jw*z)
+            x = -2*jw*z
+            T1 = [s, C1, z, u, c], [-1, 1, l+1, 1, 1], [], [], \
+                [1+l+jw*eta], [2*l+2], x
+            T2 = [-s, C2, z, u],   [-1, 1, l2+1, 1],    [], [], \
+                [1+l2+jw*eta], [2*l2+2], x
+            return T1, T2
+        except ValueError:
+            T1 = [0], [-1], [], [], [], [], 0
+            return (T1,)
+    v = ctx.hypercomb(h, [l,eta], **kwargs)
+    if chop and (not ctx._im(l)) and (not ctx._im(eta)) and (not ctx._im(z)) and \
+        (ctx._re(z) >= 0):
+        v = ctx._re(v)
+    return v
+def mcmahon(ctx,kind,prime,v,m):
+    """
+    Computes an estimate for the location of the Bessel function zero
+    j_{v,m}, y_{v,m}, j'_{v,m} or y'_{v,m} using McMahon's asymptotic
+    expansion (Abramowitz & Stegun 9.5.12-13, DLMF 20.21(vi)).
+    Returns (r,err) where r is the estimated location of the root
+    and err is a positive number estimating the error of the
+    asymptotic expansion.
+    """
+    u = 4*v**2
+    if kind == 1 and not prime: b = (4*m+2*v-1)*ctx.pi/4
+    if kind == 2 and not prime: b = (4*m+2*v-3)*ctx.pi/4
+    if kind == 1 and prime: b = (4*m+2*v-3)*ctx.pi/4
+    if kind == 2 and prime: b = (4*m+2*v-1)*ctx.pi/4
+    if not prime:
+        s1 = b
+        s2 = -(u-1)/(8*b)
+        s3 = -4*(u-1)*(7*u-31)/(3*(8*b)**3)
+        s4 = -32*(u-1)*(83*u**2-982*u+3779)/(15*(8*b)**5)
+        s5 = -64*(u-1)*(6949*u**3-153855*u**2+1585743*u-6277237)/(105*(8*b)**7)
+    if prime:
+        s1 = b
+        s2 = -(u+3)/(8*b)
+        s3 = -4*(7*u**2+82*u-9)/(3*(8*b)**3)
+        s4 = -32*(83*u**3+2075*u**2-3039*u+3537)/(15*(8*b)**5)
+        s5 = -64*(6949*u**4+296492*u**3-1248002*u**2+7414380*u-5853627)/(105*(8*b)**7)
+    terms = [s1,s2,s3,s4,s5]
+    s = s1
+    err = 0.0
+    for i in range(1,len(terms)):
+        if abs(terms[i]) < abs(terms[i-1]):
+            s += terms[i]
+        else:
+            err = abs(terms[i])
+    if i == len(terms)-1:
+        err = abs(terms[-1])
+    return s, err
+def generalized_bisection(ctx,f,a,b,n):
+    """
+    Given f known to have exactly n simple roots within [a,b],
+    return a list of n intervals isolating the roots
+    and having opposite signs at the endpoints.
+    TODO: this can be optimized, e.g. by reusing evaluation points.
+    """
+    if n < 1:
+        raise ValueError("n cannot be less than 1")
+    N = n+1
+    points = []
+    signs = []
+    while 1:
+        points = ctx.linspace(a,b,N)
+        signs = [ctx.sign(f(x)) for x in points]
+        ok_intervals = [(points[i],points[i+1]) for i in range(N-1) \
+            if signs[i]*signs[i+1] == -1]
+        if len(ok_intervals) == n:
+            return ok_intervals
+        N = N*2
+def find_in_interval(ctx, f, ab):
+    return ctx.findroot(f, ab, solver='illinois', verify=False)
+def bessel_zero(ctx, kind, prime, v, m, isoltol=0.01, _interval_cache={}):
+    prec = ctx.prec
+    workprec = max(prec, ctx.mag(v), ctx.mag(m))+10
+    try:
+        ctx.prec = workprec
+        v = ctx.mpf(v)
+        m = int(m)
+        prime = int(prime)
+        if v < 0:
+            raise ValueError("v cannot be negative")
+        if m < 1:
+            raise ValueError("m cannot be less than 1")
+        if not prime in (0,1):
+            raise ValueError("prime should lie between 0 and 1")
+        if kind == 1:
+            if prime: f = lambda x: ctx.besselj(v,x,derivative=1)
+            else:     f = lambda x: ctx.besselj(v,x)
+        if kind == 2:
+            if prime: f = lambda x: ctx.bessely(v,x,derivative=1)
+            else:     f = lambda x: ctx.bessely(v,x)
+        # The first root of J' is very close to 0 for small
+        # orders, and this needs to be special-cased
+        if kind == 1 and prime and m == 1:
+            if v == 0:
+                return ctx.zero
+            if v <= 1:
+                # TODO: use v <= j'_{v,1} < y_{v,1}?
+                r = 2*ctx.sqrt(v*(1+v)/(v+2))
+                return find_in_interval(ctx, f, (r/10, 2*r))
+        if (kind,prime,v,m) in _interval_cache:
+            return find_in_interval(ctx, f, _interval_cache[kind,prime,v,m])
+        r, err = mcmahon(ctx, kind, prime, v, m)
+        if err < isoltol:
+            return find_in_interval(ctx, f, (r-isoltol, r+isoltol))
+        # An x such that 0 < x < r_{v,1}
+        if kind == 1 and not prime: low = 2.4
+        if kind == 1 and prime: low = 1.8
+        if kind == 2 and not prime: low = 0.8
+        if kind == 2 and prime: low = 2.0
+        n = m+1
+        while 1:
+            r1, err = mcmahon(ctx, kind, prime, v, n)
+            if err < isoltol:
+                r2, err2 = mcmahon(ctx, kind, prime, v, n+1)
+                intervals = generalized_bisection(ctx, f, low, 0.5*(r1+r2), n)
+                for k, ab in enumerate(intervals):
+                    _interval_cache[kind,prime,v,k+1] = ab
+                return find_in_interval(ctx, f, intervals[m-1])
+            else:
+                n = n*2
+    finally:
+        ctx.prec = prec
+@defun
+def besseljzero(ctx, v, m, derivative=0):
+    r"""
+    For a real order `\nu \ge 0` and a positive integer `m`, returns
+    `j_{\nu,m}`, the `m`-th positive zero of the Bessel function of the
+    first kind `J_{\nu}(z)` (see :func:`~mpmath.besselj`). Alternatively,
+    with *derivative=1*, gives the first nonnegative simple zero
+    `j'_{\nu,m}` of `J'_{\nu}(z)`.
+    The indexing convention is that used by Abramowitz & Stegun
+    and the DLMF. Note the special case `j'_{0,1} = 0`, while all other
+    zeros are positive. In effect, only simple zeros are counted
+    (all zeros of Bessel functions are simple except possibly `z = 0`)
+    and `j_{\nu,m}` becomes a monotonic function of both `\nu`
+    and `m`.
+    The zeros are interlaced according to the inequalities
+    .. math ::
+        j'_{\nu,k} < j_{\nu,k} < j'_{\nu,k+1}
+        j_{\nu,1} < j_{\nu+1,2} < j_{\nu,2} < j_{\nu+1,2} < j_{\nu,3} < \cdots
+    **Examples**
+    Initial zeros of the Bessel functions `J_0(z), J_1(z), J_2(z)`::
+        >>> from mpmath import *
+        >>> mp.dps = 25; mp.pretty = True
+        >>> besseljzero(0,1); besseljzero(0,2); besseljzero(0,3)
+        2.404825557695772768621632
+        5.520078110286310649596604
+        8.653727912911012216954199
+        >>> besseljzero(1,1); besseljzero(1,2); besseljzero(1,3)
+        3.831705970207512315614436
+        7.01558666981561875353705
+        10.17346813506272207718571
+        >>> besseljzero(2,1); besseljzero(2,2); besseljzero(2,3)
+        5.135622301840682556301402
+        8.417244140399864857783614
+        11.61984117214905942709415
+    Initial zeros of `J'_0(z), J'_1(z), J'_2(z)`::
+        0.0
+        3.831705970207512315614436
+        7.01558666981561875353705
+        >>> besseljzero(1,1,1); besseljzero(1,2,1); besseljzero(1,3,1)
+        1.84118378134065930264363
+        5.331442773525032636884016
+        8.536316366346285834358961
+        >>> besseljzero(2,1,1); besseljzero(2,2,1); besseljzero(2,3,1)
+        3.054236928227140322755932
+        6.706133194158459146634394
+        9.969467823087595793179143
+    Zeros with large index::
+        >>> besseljzero(0,100); besseljzero(0,1000); besseljzero(0,10000)
+        313.3742660775278447196902
+        3140.807295225078628895545
+        31415.14114171350798533666
+        >>> besseljzero(5,100); besseljzero(5,1000); besseljzero(5,10000)
+        321.1893195676003157339222
+        3148.657306813047523500494
+        31422.9947255486291798943
+        >>> besseljzero(0,100,1); besseljzero(0,1000,1); besseljzero(0,10000,1)
+        311.8018681873704508125112
+        3139.236339643802482833973
+        31413.57032947022399485808
+    Zeros of functions with large order::
+        >>> besseljzero(50,1)
+        57.11689916011917411936228
+        >>> besseljzero(50,2)
+        62.80769876483536093435393
+        >>> besseljzero(50,100)
+        388.6936600656058834640981
+        >>> besseljzero(50,1,1)
+        52.99764038731665010944037
+        >>> besseljzero(50,2,1)
+        60.02631933279942589882363
+        >>> besseljzero(50,100,1)
+        387.1083151608726181086283
+    Zeros of functions with fractional order::
+        >>> besseljzero(0.5,1); besseljzero(1.5,1); besseljzero(2.25,4)
+        3.141592653589793238462643
+        4.493409457909064175307881
+        15.15657692957458622921634
+    Both `J_{\nu}(z)` and `J'_{\nu}(z)` can be expressed as infinite
+    products over their zeros::
+        >>> v,z = 2, mpf(1)
+        >>> (z/2)**v/gamma(v+1) * \
+        ...     nprod(lambda k: 1-(z/besseljzero(v,k))**2, [1,inf])
+        ...
+        0.1149034849319004804696469
+        >>> besselj(v,z)
+        0.1149034849319004804696469
+        >>> (z/2)**(v-1)/2/gamma(v) * \
+        ...     nprod(lambda k: 1-(z/besseljzero(v,k,1))**2, [1,inf])
+        ...
+        0.2102436158811325550203884
+        >>> besselj(v,z,1)
+        0.2102436158811325550203884
+    """
+    return +bessel_zero(ctx, 1, derivative, v, m)
+@defun
+def besselyzero(ctx, v, m, derivative=0):
+    r"""
+    For a real order `\nu \ge 0` and a positive integer `m`, returns
+    `y_{\nu,m}`, the `m`-th positive zero of the Bessel function of the
+    second kind `Y_{\nu}(z)` (see :func:`~mpmath.bessely`). Alternatively,
+    with *derivative=1*, gives the first positive zero `y'_{\nu,m}` of
+    `Y'_{\nu}(z)`.
+    The zeros are interlaced according to the inequalities
+    .. math ::
+        y_{\nu,k} < y'_{\nu,k} < y_{\nu,k+1}
+        y_{\nu,1} < y_{\nu+1,2} < y_{\nu,2} < y_{\nu+1,2} < y_{\nu,3} < \cdots
+    **Examples**
+    Initial zeros of the Bessel functions `Y_0(z), Y_1(z), Y_2(z)`::
+        >>> from mpmath import *
+        >>> mp.dps = 25; mp.pretty = True
+        >>> besselyzero(0,1); besselyzero(0,2); besselyzero(0,3)
+        0.8935769662791675215848871
+        3.957678419314857868375677
+        7.086051060301772697623625
+        >>> besselyzero(1,1); besselyzero(1,2); besselyzero(1,3)
+        2.197141326031017035149034
+        5.429681040794135132772005
+        8.596005868331168926429606
+        >>> besselyzero(2,1); besselyzero(2,2); besselyzero(2,3)
+        3.384241767149593472701426
+        6.793807513268267538291167
+        10.02347797936003797850539
+    Initial zeros of `Y'_0(z), Y'_1(z), Y'_2(z)`::
+        >>> besselyzero(0,1,1); besselyzero(0,2,1); besselyzero(0,3,1)
+        2.197141326031017035149034
+        5.429681040794135132772005
+        8.596005868331168926429606
+        >>> besselyzero(1,1,1); besselyzero(1,2,1); besselyzero(1,3,1)
+        3.683022856585177699898967
+        6.941499953654175655751944
+        10.12340465543661307978775
+        >>> besselyzero(2,1,1); besselyzero(2,2,1); besselyzero(2,3,1)
+        5.002582931446063945200176
+        8.350724701413079526349714
+        11.57419546521764654624265
+    Zeros with large index::
+        >>> besselyzero(0,100); besselyzero(0,1000); besselyzero(0,10000)
+        311.8034717601871549333419
+        3139.236498918198006794026
+        31413.57034538691205229188
+        >>> besselyzero(5,100); besselyzero(5,1000); besselyzero(5,10000)
+        319.6183338562782156235062
+        3147.086508524556404473186
+        31421.42392920214673402828
+        >>> besselyzero(0,100,1); besselyzero(0,1000,1); besselyzero(0,10000,1)
+        313.3726705426359345050449
+        3140.807136030340213610065
+        31415.14112579761578220175
+    Zeros of functions with large order::
+        >>> besselyzero(50,1)
+        53.50285882040036394680237
+        >>> besselyzero(50,2)
+        60.11244442774058114686022
+        >>> besselyzero(50,100)
+        387.1096509824943957706835
+        >>> besselyzero(50,1,1)
+        56.96290427516751320063605
+        >>> besselyzero(50,2,1)
+        62.74888166945933944036623
+        >>> besselyzero(50,100,1)
+        388.6923300548309258355475
+    Zeros of functions with fractional order::
+        >>> besselyzero(0.5,1); besselyzero(1.5,1); besselyzero(2.25,4)
+        1.570796326794896619231322
+        2.798386045783887136720249
+        13.56721208770735123376018
+    """
+    return +bessel_zero(ctx, 2, derivative, v, m)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/factorials.py ADDED Viewed

	@@ -0,0 +1,187 @@

+from ..libmp.backend import xrange
+from .functions import defun, defun_wrapped
+@defun
+def gammaprod(ctx, a, b, _infsign=False):
+    a = [ctx.convert(x) for x in a]
+    b = [ctx.convert(x) for x in b]
+    poles_num = []
+    poles_den = []
+    regular_num = []
+    regular_den = []
+    for x in a: [regular_num, poles_num][ctx.isnpint(x)].append(x)
+    for x in b: [regular_den, poles_den][ctx.isnpint(x)].append(x)
+    # One more pole in numerator or denominator gives 0 or inf
+    if len(poles_num) < len(poles_den): return ctx.zero
+    if len(poles_num) > len(poles_den):
+        # Get correct sign of infinity for x+h, h -> 0 from above
+        # XXX: hack, this should be done properly
+        if _infsign:
+            a = [x and x*(1+ctx.eps) or x+ctx.eps for x in poles_num]
+            b = [x and x*(1+ctx.eps) or x+ctx.eps for x in poles_den]
+            return ctx.sign(ctx.gammaprod(a+regular_num,b+regular_den)) * ctx.inf
+        else:
+            return ctx.inf
+    # All poles cancel
+    # lim G(i)/G(j) = (-1)**(i+j) * gamma(1-j) / gamma(1-i)
+    p = ctx.one
+    orig = ctx.prec
+    try:
+        ctx.prec = orig + 15
+        while poles_num:
+            i = poles_num.pop()
+            j = poles_den.pop()
+            p *= (-1)**(i+j) * ctx.gamma(1-j) / ctx.gamma(1-i)
+        for x in regular_num: p *= ctx.gamma(x)
+        for x in regular_den: p /= ctx.gamma(x)
+    finally:
+        ctx.prec = orig
+    return +p
+@defun
+def beta(ctx, x, y):
+    x = ctx.convert(x)
+    y = ctx.convert(y)
+    if ctx.isinf(y):
+        x, y = y, x
+    if ctx.isinf(x):
+        if x == ctx.inf and not ctx._im(y):
+            if y == ctx.ninf:
+                return ctx.nan
+            if y > 0:
+                return ctx.zero
+            if ctx.isint(y):
+                return ctx.nan
+            if y < 0:
+                return ctx.sign(ctx.gamma(y)) * ctx.inf
+        return ctx.nan
+    xy = ctx.fadd(x, y, prec=2*ctx.prec)
+    return ctx.gammaprod([x, y], [xy])
+@defun
+def binomial(ctx, n, k):
+    n1 = ctx.fadd(n, 1, prec=2*ctx.prec)
+    k1 = ctx.fadd(k, 1, prec=2*ctx.prec)
+    nk1 = ctx.fsub(n1, k, prec=2*ctx.prec)
+    return ctx.gammaprod([n1], [k1, nk1])
+@defun
+def rf(ctx, x, n):
+    xn = ctx.fadd(x, n, prec=2*ctx.prec)
+    return ctx.gammaprod([xn], [x])
+@defun
+def ff(ctx, x, n):
+    x1 = ctx.fadd(x, 1, prec=2*ctx.prec)
+    xn1 = ctx.fadd(ctx.fsub(x, n, prec=2*ctx.prec), 1, prec=2*ctx.prec)
+    return ctx.gammaprod([x1], [xn1])
+@defun_wrapped
+def fac2(ctx, x):
+    if ctx.isinf(x):
+        if x == ctx.inf:
+            return x
+        return ctx.nan
+    return 2**(x/2)*(ctx.pi/2)**((ctx.cospi(x)-1)/4)*ctx.gamma(x/2+1)
+@defun_wrapped
+def barnesg(ctx, z):
+    if ctx.isinf(z):
+        if z == ctx.inf:
+            return z
+        return ctx.nan
+    if ctx.isnan(z):
+        return z
+    if (not ctx._im(z)) and ctx._re(z) <= 0 and ctx.isint(ctx._re(z)):
+        return z*0
+    # Account for size (would not be needed if computing log(G))
+    if abs(z) > 5:
+        ctx.dps += 2*ctx.log(abs(z),2)
+    # Reflection formula
+    if ctx.re(z) < -ctx.dps:
+        w = 1-z
+        pi2 = 2*ctx.pi
+        u = ctx.expjpi(2*w)
+        v = ctx.j*ctx.pi/12 - ctx.j*ctx.pi*w**2/2 + w*ctx.ln(1-u) - \
+            ctx.j*ctx.polylog(2, u)/pi2
+        v = ctx.barnesg(2-z)*ctx.exp(v)/pi2**w
+        if ctx._is_real_type(z):
+            v = ctx._re(v)
+        return v
+    # Estimate terms for asymptotic expansion
+    # TODO: fixme, obviously
+    N = ctx.dps // 2 + 5
+    G = 1
+    while abs(z) < N or ctx.re(z) < 1:
+        G /= ctx.gamma(z)
+        z += 1
+    z -= 1
+    s = ctx.mpf(1)/12
+    s -= ctx.log(ctx.glaisher)
+    s += z*ctx.log(2*ctx.pi)/2
+    s += (z**2/2-ctx.mpf(1)/12)*ctx.log(z)
+    s -= 3*z**2/4
+    z2k = z2 = z**2
+    for k in xrange(1, N+1):
+        t = ctx.bernoulli(2*k+2) / (4*k*(k+1)*z2k)
+        if abs(t) < ctx.eps:
+            #print k, N      # check how many terms were needed
+            break
+        z2k *= z2
+        s += t
+    #if k == N:
+    #    print "warning: series for barnesg failed to converge", ctx.dps
+    return G*ctx.exp(s)
+@defun
+def superfac(ctx, z):
+    return ctx.barnesg(z+2)
+@defun_wrapped
+def hyperfac(ctx, z):
+    # XXX: estimate needed extra bits accurately
+    if z == ctx.inf:
+        return z
+    if abs(z) > 5:
+        extra = 4*int(ctx.log(abs(z),2))
+    else:
+        extra = 0
+    ctx.prec += extra
+    if not ctx._im(z) and ctx._re(z) < 0 and ctx.isint(ctx._re(z)):
+        n = int(ctx.re(z))
+        h = ctx.hyperfac(-n-1)
+        if ((n+1)//2) & 1:
+            h = -h
+        if ctx._is_complex_type(z):
+            return h + 0j
+        return h
+    zp1 = z+1
+    # Wrong branch cut
+    #v = ctx.gamma(zp1)**z
+    #ctx.prec -= extra
+    #return v / ctx.barnesg(zp1)
+    v = ctx.exp(z*ctx.loggamma(zp1))
+    ctx.prec -= extra
+    return v / ctx.barnesg(zp1)
+'''
+@defun
+def psi0(ctx, z):
+    """Shortcut for psi(0,z) (the digamma function)"""
+    return ctx.psi(0, z)
+@defun
+def psi1(ctx, z):
+    """Shortcut for psi(1,z) (the trigamma function)"""
+    return ctx.psi(1, z)
+@defun
+def psi2(ctx, z):
+    """Shortcut for psi(2,z) (the tetragamma function)"""
+    return ctx.psi(2, z)
+@defun
+def psi3(ctx, z):
+    """Shortcut for psi(3,z) (the pentagamma function)"""
+    return ctx.psi(3, z)
+'''

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/functions.py ADDED Viewed

	@@ -0,0 +1,645 @@

+from ..libmp.backend import xrange
+class SpecialFunctions(object):
+    """
+    This class implements special functions using high-level code.
+    Elementary and some other functions (e.g. gamma function, basecase
+    hypergeometric series) are assumed to be predefined by the context as
+    "builtins" or "low-level" functions.
+    """
+    defined_functions = {}
+    # The series for the Jacobi theta functions converge for |q| < 1;
+    # in the current implementation they throw a ValueError for
+    # abs(q) > THETA_Q_LIM
+    THETA_Q_LIM = 1 - 10**-7
+    def __init__(self):
+        cls = self.__class__
+        for name in cls.defined_functions:
+            f, wrap = cls.defined_functions[name]
+            cls._wrap_specfun(name, f, wrap)
+        self.mpq_1 = self._mpq((1,1))
+        self.mpq_0 = self._mpq((0,1))
+        self.mpq_1_2 = self._mpq((1,2))
+        self.mpq_3_2 = self._mpq((3,2))
+        self.mpq_1_4 = self._mpq((1,4))
+        self.mpq_1_16 = self._mpq((1,16))
+        self.mpq_3_16 = self._mpq((3,16))
+        self.mpq_5_2 = self._mpq((5,2))
+        self.mpq_3_4 = self._mpq((3,4))
+        self.mpq_7_4 = self._mpq((7,4))
+        self.mpq_5_4 = self._mpq((5,4))
+        self.mpq_1_3 = self._mpq((1,3))
+        self.mpq_2_3 = self._mpq((2,3))
+        self.mpq_4_3 = self._mpq((4,3))
+        self.mpq_1_6 = self._mpq((1,6))
+        self.mpq_5_6 = self._mpq((5,6))
+        self.mpq_5_3 = self._mpq((5,3))
+        self._misc_const_cache = {}
+        self._aliases.update({
+            'phase' : 'arg',
+            'conjugate' : 'conj',
+            'nthroot' : 'root',
+            'polygamma' : 'psi',
+            'hurwitz' : 'zeta',
+            #'digamma' : 'psi0',
+            #'trigamma' : 'psi1',
+            #'tetragamma' : 'psi2',
+            #'pentagamma' : 'psi3',
+            'fibonacci' : 'fib',
+            'factorial' : 'fac',
+        })
+        self.zetazero_memoized = self.memoize(self.zetazero)
+    # Default -- do nothing
+    @classmethod
+    def _wrap_specfun(cls, name, f, wrap):
+        setattr(cls, name, f)
+    # Optional fast versions of common functions in common cases.
+    # If not overridden, default (generic hypergeometric series)
+    # implementations will be used
+    def _besselj(ctx, n, z): raise NotImplementedError
+    def _erf(ctx, z): raise NotImplementedError
+    def _erfc(ctx, z): raise NotImplementedError
+    def _gamma_upper_int(ctx, z, a): raise NotImplementedError
+    def _expint_int(ctx, n, z): raise NotImplementedError
+    def _zeta(ctx, s): raise NotImplementedError
+    def _zetasum_fast(ctx, s, a, n, derivatives, reflect): raise NotImplementedError
+    def _ei(ctx, z): raise NotImplementedError
+    def _e1(ctx, z): raise NotImplementedError
+    def _ci(ctx, z): raise NotImplementedError
+    def _si(ctx, z): raise NotImplementedError
+    def _altzeta(ctx, s): raise NotImplementedError
+def defun_wrapped(f):
+    SpecialFunctions.defined_functions[f.__name__] = f, True
+    return f
+def defun(f):
+    SpecialFunctions.defined_functions[f.__name__] = f, False
+    return f
+def defun_static(f):
+    setattr(SpecialFunctions, f.__name__, f)
+    return f
+@defun_wrapped
+def cot(ctx, z): return ctx.one / ctx.tan(z)
+@defun_wrapped
+def sec(ctx, z): return ctx.one / ctx.cos(z)
+@defun_wrapped
+def csc(ctx, z): return ctx.one / ctx.sin(z)
+@defun_wrapped
+def coth(ctx, z): return ctx.one / ctx.tanh(z)
+@defun_wrapped
+def sech(ctx, z): return ctx.one / ctx.cosh(z)
+@defun_wrapped
+def csch(ctx, z): return ctx.one / ctx.sinh(z)
+@defun_wrapped
+def acot(ctx, z):
+    if not z:
+        return ctx.pi * 0.5
+    else:
+        return ctx.atan(ctx.one / z)
+@defun_wrapped
+def asec(ctx, z): return ctx.acos(ctx.one / z)
+@defun_wrapped
+def acsc(ctx, z): return ctx.asin(ctx.one / z)
+@defun_wrapped
+def acoth(ctx, z):
+    if not z:
+        return ctx.pi * 0.5j
+    else:
+        return ctx.atanh(ctx.one / z)
+@defun_wrapped
+def asech(ctx, z): return ctx.acosh(ctx.one / z)
+@defun_wrapped
+def acsch(ctx, z): return ctx.asinh(ctx.one / z)
+@defun
+def sign(ctx, x):
+    x = ctx.convert(x)
+    if not x or ctx.isnan(x):
+        return x
+    if ctx._is_real_type(x):
+        if x > 0:
+            return ctx.one
+        else:
+            return -ctx.one
+    return x / abs(x)
+@defun
+def agm(ctx, a, b=1):
+    if b == 1:
+        return ctx.agm1(a)
+    a = ctx.convert(a)
+    b = ctx.convert(b)
+    return ctx._agm(a, b)
+@defun_wrapped
+def sinc(ctx, x):
+    if ctx.isinf(x):
+        return 1/x
+    if not x:
+        return x+1
+    return ctx.sin(x)/x
+@defun_wrapped
+def sincpi(ctx, x):
+    if ctx.isinf(x):
+        return 1/x
+    if not x:
+        return x+1
+    return ctx.sinpi(x)/(ctx.pi*x)
+# TODO: tests; improve implementation
+@defun_wrapped
+def expm1(ctx, x):
+    if not x:
+        return ctx.zero
+    # exp(x) - 1 ~ x
+    if ctx.mag(x) < -ctx.prec:
+        return x + 0.5*x**2
+    # TODO: accurately eval the smaller of the real/imag parts
+    return ctx.sum_accurately(lambda: iter([ctx.exp(x),-1]),1)
+@defun_wrapped
+def log1p(ctx, x):
+    if not x:
+        return ctx.zero
+    if ctx.mag(x) < -ctx.prec:
+        return x - 0.5*x**2
+    return ctx.log(ctx.fadd(1, x, prec=2*ctx.prec))
+@defun_wrapped
+def powm1(ctx, x, y):
+    mag = ctx.mag
+    one = ctx.one
+    w = x**y - one
+    M = mag(w)
+    # Only moderate cancellation
+    if M > -8:
+        return w
+    # Check for the only possible exact cases
+    if not w:
+        if (not y) or (x in (1, -1, 1j, -1j) and ctx.isint(y)):
+            return w
+    x1 = x - one
+    magy = mag(y)
+    lnx = ctx.ln(x)
+    # Small y: x^y - 1 ~ log(x)*y + O(log(x)^2 * y^2)
+    if magy + mag(lnx) < -ctx.prec:
+        return lnx*y + (lnx*y)**2/2
+    # TODO: accurately eval the smaller of the real/imag part
+    return ctx.sum_accurately(lambda: iter([x**y, -1]), 1)
+@defun
+def _rootof1(ctx, k, n):
+    k = int(k)
+    n = int(n)
+    k %= n
+    if not k:
+        return ctx.one
+    elif 2*k == n:
+        return -ctx.one
+    elif 4*k == n:
+        return ctx.j
+    elif 4*k == 3*n:
+        return -ctx.j
+    return ctx.expjpi(2*ctx.mpf(k)/n)
+@defun
+def root(ctx, x, n, k=0):
+    n = int(n)
+    x = ctx.convert(x)
+    if k:
+        # Special case: there is an exact real root
+        if (n & 1 and 2*k == n-1) and (not ctx.im(x)) and (ctx.re(x) < 0):
+            return -ctx.root(-x, n)
+        # Multiply by root of unity
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            v = ctx.root(x, n, 0) * ctx._rootof1(k, n)
+        finally:
+            ctx.prec = prec
+        return +v
+    return ctx._nthroot(x, n)
+@defun
+def unitroots(ctx, n, primitive=False):
+    gcd = ctx._gcd
+    prec = ctx.prec
+    try:
+        ctx.prec += 10
+        if primitive:
+            v = [ctx._rootof1(k,n) for k in range(n) if gcd(k,n) == 1]
+        else:
+            # TODO: this can be done *much* faster
+            v = [ctx._rootof1(k,n) for k in range(n)]
+    finally:
+        ctx.prec = prec
+    return [+x for x in v]
+@defun
+def arg(ctx, x):
+    x = ctx.convert(x)
+    re = ctx._re(x)
+    im = ctx._im(x)
+    return ctx.atan2(im, re)
+@defun
+def fabs(ctx, x):
+    return abs(ctx.convert(x))
+@defun
+def re(ctx, x):
+    x = ctx.convert(x)
+    if hasattr(x, "real"):    # py2.5 doesn't have .real/.imag for all numbers
+        return x.real
+    return x
+@defun
+def im(ctx, x):
+    x = ctx.convert(x)
+    if hasattr(x, "imag"):    # py2.5 doesn't have .real/.imag for all numbers
+        return x.imag
+    return ctx.zero
+@defun
+def conj(ctx, x):
+    x = ctx.convert(x)
+    try:
+        return x.conjugate()
+    except AttributeError:
+        return x
+@defun
+def polar(ctx, z):
+    return (ctx.fabs(z), ctx.arg(z))
+@defun_wrapped
+def rect(ctx, r, phi):
+    return r * ctx.mpc(*ctx.cos_sin(phi))
+@defun
+def log(ctx, x, b=None):
+    if b is None:
+        return ctx.ln(x)
+    wp = ctx.prec + 20
+    return ctx.ln(x, prec=wp) / ctx.ln(b, prec=wp)
+@defun
+def log10(ctx, x):
+    return ctx.log(x, 10)
+@defun
+def fmod(ctx, x, y):
+    return ctx.convert(x) % ctx.convert(y)
+@defun
+def degrees(ctx, x):
+    return x / ctx.degree
+@defun
+def radians(ctx, x):
+    return x * ctx.degree
+def _lambertw_special(ctx, z, k):
+    # W(0,0) = 0; all other branches are singular
+    if not z:
+        if not k:
+            return z
+        return ctx.ninf + z
+    if z == ctx.inf:
+        if k == 0:
+            return z
+        else:
+            return z + 2*k*ctx.pi*ctx.j
+    if z == ctx.ninf:
+        return (-z) + (2*k+1)*ctx.pi*ctx.j
+    # Some kind of nan or complex inf/nan?
+    return ctx.ln(z)
+import math
+import cmath
+def _lambertw_approx_hybrid(z, k):
+    imag_sign = 0
+    if hasattr(z, "imag"):
+        x = float(z.real)
+        y = z.imag
+        if y:
+            imag_sign = (-1) ** (y < 0)
+        y = float(y)
+    else:
+        x = float(z)
+        y = 0.0
+        imag_sign = 0
+    # hack to work regardless of whether Python supports -0.0
+    if not y:
+        y = 0.0
+    z = complex(x,y)
+    if k == 0:
+        if -4.0 < y < 4.0 and -1.0 < x < 2.5:
+            if imag_sign:
+                # Taylor series in upper/lower half-plane
+                if y > 1.00: return (0.876+0.645j) + (0.118-0.174j)*(z-(0.75+2.5j))
+                if y > 0.25: return (0.505+0.204j) + (0.375-0.132j)*(z-(0.75+0.5j))
+                if y < -1.00: return (0.876-0.645j) + (0.118+0.174j)*(z-(0.75-2.5j))
+                if y < -0.25: return (0.505-0.204j) + (0.375+0.132j)*(z-(0.75-0.5j))
+            # Taylor series near -1
+            if x < -0.5:
+                if imag_sign >= 0:
+                    return (-0.318+1.34j) + (-0.697-0.593j)*(z+1)
+                else:
+                    return (-0.318-1.34j) + (-0.697+0.593j)*(z+1)
+            # return real type
+            r = -0.367879441171442
+            if (not imag_sign) and x > r:
+                z = x
+            # Singularity near -1/e
+            if x < -0.2:
+                return -1 + 2.33164398159712*(z-r)**0.5 - 1.81218788563936*(z-r)
+            # Taylor series near 0
+            if x < 0.5: return z
+            # Simple linear approximation
+            return 0.2 + 0.3*z
+        if (not imag_sign) and x > 0.0:
+            L1 = math.log(x); L2 = math.log(L1)
+        else:
+            L1 = cmath.log(z); L2 = cmath.log(L1)
+    elif k == -1:
+        # return real type
+        r = -0.367879441171442
+        if (not imag_sign) and r < x < 0.0:
+            z = x
+        if (imag_sign >= 0) and y < 0.1 and -0.6 < x < -0.2:
+            return -1 - 2.33164398159712*(z-r)**0.5 - 1.81218788563936*(z-r)
+        if (not imag_sign) and -0.2 <= x < 0.0:
+            L1 = math.log(-x)
+            return L1 - math.log(-L1)
+        else:
+            if imag_sign == -1 and (not y) and x < 0.0:
+                L1 = cmath.log(z) - 3.1415926535897932j
+            else:
+                L1 = cmath.log(z) - 6.2831853071795865j
+            L2 = cmath.log(L1)
+    return L1 - L2 + L2/L1 + L2*(L2-2)/(2*L1**2)
+def _lambertw_series(ctx, z, k, tol):
+    """
+    Return rough approximation for W_k(z) from an asymptotic series,
+    sufficiently accurate for the Halley iteration to converge to
+    the correct value.
+    """
+    magz = ctx.mag(z)
+    if (-10 < magz < 900) and (-1000 < k < 1000):
+        # Near the branch point at -1/e
+        if magz < 1 and abs(z+0.36787944117144) < 0.05:
+            if k == 0 or (k == -1 and ctx._im(z) >= 0) or \
+                         (k == 1  and ctx._im(z) < 0):
+                delta = ctx.sum_accurately(lambda: [z, ctx.exp(-1)])
+                cancellation = -ctx.mag(delta)
+                ctx.prec += cancellation
+                # Use series given in Corless et al.
+                p = ctx.sqrt(2*(ctx.e*z+1))
+                ctx.prec -= cancellation
+                u = {0:ctx.mpf(-1), 1:ctx.mpf(1)}
+                a = {0:ctx.mpf(2), 1:ctx.mpf(-1)}
+                if k != 0:
+                    p = -p
+                s = ctx.zero
+                # The series converges, so we could use it directly, but unless
+                # *extremely* close, it is better to just use the first few
+                # terms to get a good approximation for the iteration
+                for l in xrange(max(2,cancellation)):
+                    if l not in u:
+                        a[l] = ctx.fsum(u[j]*u[l+1-j] for j in xrange(2,l))
+                        u[l] = (l-1)*(u[l-2]/2+a[l-2]/4)/(l+1)-a[l]/2-u[l-1]/(l+1)
+                    term = u[l] * p**l
+                    s += term
+                    if ctx.mag(term) < -tol:
+                        return s, True
+                    l += 1
+                ctx.prec += cancellation//2
+                return s, False
+        if k == 0 or k == -1:
+            return _lambertw_approx_hybrid(z, k), False
+    if k == 0:
+        if magz < -1:
+            return z*(1-z), False
+        L1 = ctx.ln(z)
+        L2 = ctx.ln(L1)
+    elif k == -1 and (not ctx._im(z)) and (-0.36787944117144 < ctx._re(z) < 0):
+        L1 = ctx.ln(-z)
+        return L1 - ctx.ln(-L1), False
+    else:
+        # This holds both as z -> 0 and z -> inf.
+        # Relative error is O(1/log(z)).
+        L1 = ctx.ln(z) + 2j*ctx.pi*k
+        L2 = ctx.ln(L1)
+    return L1 - L2 + L2/L1 + L2*(L2-2)/(2*L1**2), False
+@defun
+def lambertw(ctx, z, k=0):
+    z = ctx.convert(z)
+    k = int(k)
+    if not ctx.isnormal(z):
+        return _lambertw_special(ctx, z, k)
+    prec = ctx.prec
+    ctx.prec += 20 + ctx.mag(k or 1)
+    wp = ctx.prec
+    tol = wp - 5
+    w, done = _lambertw_series(ctx, z, k, tol)
+    if not done:
+        # Use Halley iteration to solve w*exp(w) = z
+        two = ctx.mpf(2)
+        for i in xrange(100):
+            ew = ctx.exp(w)
+            wew = w*ew
+            wewz = wew-z
+            wn = w - wewz/(wew+ew-(w+two)*wewz/(two*w+two))
+            if ctx.mag(wn-w) <= ctx.mag(wn) - tol:
+                w = wn
+                break
+            else:
+                w = wn
+        if i == 100:
+            ctx.warn("Lambert W iteration failed to converge for z = %s" % z)
+    ctx.prec = prec
+    return +w
+@defun_wrapped
+def bell(ctx, n, x=1):
+    x = ctx.convert(x)
+    if not n:
+        if ctx.isnan(x):
+            return x
+        return type(x)(1)
+    if ctx.isinf(x) or ctx.isinf(n) or ctx.isnan(x) or ctx.isnan(n):
+        return x**n
+    if n == 1: return x
+    if n == 2: return x*(x+1)
+    if x == 0: return ctx.sincpi(n)
+    return _polyexp(ctx, n, x, True) / ctx.exp(x)
+def _polyexp(ctx, n, x, extra=False):
+    def _terms():
+        if extra:
+            yield ctx.sincpi(n)
+        t = x
+        k = 1
+        while 1:
+            yield k**n * t
+            k += 1
+            t = t*x/k
+    return ctx.sum_accurately(_terms, check_step=4)
+@defun_wrapped
+def polyexp(ctx, s, z):
+    if ctx.isinf(z) or ctx.isinf(s) or ctx.isnan(z) or ctx.isnan(s):
+        return z**s
+    if z == 0: return z*s
+    if s == 0: return ctx.expm1(z)
+    if s == 1: return ctx.exp(z)*z
+    if s == 2: return ctx.exp(z)*z*(z+1)
+    return _polyexp(ctx, s, z)
+@defun_wrapped
+def cyclotomic(ctx, n, z):
+    n = int(n)
+    if n < 0:
+        raise ValueError("n cannot be negative")
+    p = ctx.one
+    if n == 0:
+        return p
+    if n == 1:
+        return z - p
+    if n == 2:
+        return z + p
+    # Use divisor product representation. Unfortunately, this sometimes
+    # includes singularities for roots of unity, which we have to cancel out.
+    # Matching zeros/poles pairwise, we have (1-z^a)/(1-z^b) ~ a/b + O(z-1).
+    a_prod = 1
+    b_prod = 1
+    num_zeros = 0
+    num_poles = 0
+    for d in range(1,n+1):
+        if not n % d:
+            w = ctx.moebius(n//d)
+            # Use powm1 because it is important that we get 0 only
+            # if it really is exactly 0
+            b = -ctx.powm1(z, d)
+            if b:
+                p *= b**w
+            else:
+                if w == 1:
+                    a_prod *= d
+                    num_zeros += 1
+                elif w == -1:
+                    b_prod *= d
+                    num_poles += 1
+    #print n, num_zeros, num_poles
+    if num_zeros:
+        if num_zeros > num_poles:
+            p *= 0
+        else:
+            p *= a_prod
+            p /= b_prod
+    return p
+@defun
+def mangoldt(ctx, n):
+    r"""
+    Evaluates the von Mangoldt function `\Lambda(n) = \log p`
+    if `n = p^k` a power of a prime, and `\Lambda(n) = 0` otherwise.
+    **Examples**
+        >>> from mpmath import *
+        >>> mp.dps = 25; mp.pretty = True
+        >>> [mangoldt(n) for n in range(-2,3)]
+        [0.0, 0.0, 0.0, 0.0, 0.6931471805599453094172321]
+        >>> mangoldt(6)
+        0.0
+        >>> mangoldt(7)
+        1.945910149055313305105353
+        >>> mangoldt(8)
+        0.6931471805599453094172321
+        >>> fsum(mangoldt(n) for n in range(101))
+        94.04531122935739224600493
+        >>> fsum(mangoldt(n) for n in range(10001))
+        10013.39669326311478372032
+    """
+    n = int(n)
+    if n < 2:
+        return ctx.zero
+    if n % 2 == 0:
+        # Must be a power of two
+        if n & (n-1) == 0:
+            return +ctx.ln2
+        else:
+            return ctx.zero
+    # TODO: the following could be generalized into a perfect
+    # power testing function
+    # ---
+    # Look for a small factor
+    for p in (3,5,7,11,13,17,19,23,29,31):
+        if not n % p:
+            q, r = n // p, 0
+            while q > 1:
+                q, r = divmod(q, p)
+                if r:
+                    return ctx.zero
+            return ctx.ln(p)
+    if ctx.isprime(n):
+        return ctx.ln(n)
+    # Obviously, we could use arbitrary-precision arithmetic for this...
+    if n > 10**30:
+        raise NotImplementedError
+    k = 2
+    while 1:
+        p = int(n**(1./k) + 0.5)
+        if p < 2:
+            return ctx.zero
+        if p ** k == n:
+            if ctx.isprime(p):
+                return ctx.ln(p)
+        k += 1
+@defun
+def stirling1(ctx, n, k, exact=False):
+    v = ctx._stirling1(int(n), int(k))
+    if exact:
+        return int(v)
+    else:
+        return ctx.mpf(v)
+@defun
+def stirling2(ctx, n, k, exact=False):
+    v = ctx._stirling2(int(n), int(k))
+    if exact:
+        return int(v)
+    else:
+        return ctx.mpf(v)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/hypergeometric.py ADDED Viewed

	@@ -0,0 +1,1413 @@

+from ..libmp.backend import xrange
+from .functions import defun, defun_wrapped
+def _check_need_perturb(ctx, terms, prec, discard_known_zeros):
+    perturb = recompute = False
+    extraprec = 0
+    discard = []
+    for term_index, term in enumerate(terms):
+        w_s, c_s, alpha_s, beta_s, a_s, b_s, z = term
+        have_singular_nongamma_weight = False
+        # Avoid division by zero in leading factors (TODO:
+        # also check for near division by zero?)
+        for k, w in enumerate(w_s):
+            if not w:
+                if ctx.re(c_s[k]) <= 0 and c_s[k]:
+                    perturb = recompute = True
+                    have_singular_nongamma_weight = True
+        pole_count = [0, 0, 0]
+        # Check for gamma and series poles and near-poles
+        for data_index, data in enumerate([alpha_s, beta_s, b_s]):
+            for i, x in enumerate(data):
+                n, d = ctx.nint_distance(x)
+                # Poles
+                if n > 0:
+                    continue
+                if d == ctx.ninf:
+                    # OK if we have a polynomial
+                    # ------------------------------
+                    ok = False
+                    if data_index == 2:
+                        for u in a_s:
+                            if ctx.isnpint(u) and u >= int(n):
+                                ok = True
+                                break
+                    if ok:
+                        continue
+                    pole_count[data_index] += 1
+                    # ------------------------------
+                    #perturb = recompute = True
+                    #return perturb, recompute, extraprec
+                elif d < -4:
+                    extraprec += -d
+                    recompute = True
+        if discard_known_zeros and pole_count[1] > pole_count[0] + pole_count[2] \
+            and not have_singular_nongamma_weight:
+            discard.append(term_index)
+        elif sum(pole_count):
+            perturb = recompute = True
+    return perturb, recompute, extraprec, discard
+_hypercomb_msg = """
+hypercomb() failed to converge to the requested %i bits of accuracy
+using a working precision of %i bits. The function value may be zero or
+infinite; try passing zeroprec=N or infprec=M to bound finite values between
+2^(-N) and 2^M. Otherwise try a higher maxprec or maxterms.
+"""
+@defun
+def hypercomb(ctx, function, params=[], discard_known_zeros=True, **kwargs):
+    orig = ctx.prec
+    sumvalue = ctx.zero
+    dist = ctx.nint_distance
+    ninf = ctx.ninf
+    orig_params = params[:]
+    verbose = kwargs.get('verbose', False)
+    maxprec = kwargs.get('maxprec', ctx._default_hyper_maxprec(orig))
+    kwargs['maxprec'] = maxprec   # For calls to hypsum
+    zeroprec = kwargs.get('zeroprec')
+    infprec = kwargs.get('infprec')
+    perturbed_reference_value = None
+    hextra = 0
+    try:
+        while 1:
+            ctx.prec += 10
+            if ctx.prec > maxprec:
+                raise ValueError(_hypercomb_msg % (orig, ctx.prec))
+            orig2 = ctx.prec
+            params = orig_params[:]
+            terms = function(*params)
+            if verbose:
+                print()
+                print("ENTERING hypercomb main loop")
+                print("prec =", ctx.prec)
+                print("hextra", hextra)
+            perturb, recompute, extraprec, discard = \
+                _check_need_perturb(ctx, terms, orig, discard_known_zeros)
+            ctx.prec += extraprec
+            if perturb:
+                if "hmag" in kwargs:
+                    hmag = kwargs["hmag"]
+                elif ctx._fixed_precision:
+                    hmag = int(ctx.prec*0.3)
+                else:
+                    hmag = orig + 10 + hextra
+                h = ctx.ldexp(ctx.one, -hmag)
+                ctx.prec = orig2 + 10 + hmag + 10
+                for k in range(len(params)):
+                    params[k] += h
+                    # Heuristically ensure that the perturbations
+                    # are "independent" so that two perturbations
+                    # don't accidentally cancel each other out
+                    # in a subtraction.
+                    h += h/(k+1)
+            if recompute:
+                terms = function(*params)
+            if discard_known_zeros:
+                terms = [term for (i, term) in enumerate(terms) if i not in discard]
+            if not terms:
+                return ctx.zero
+            evaluated_terms = []
+            for term_index, term_data in enumerate(terms):
+                w_s, c_s, alpha_s, beta_s, a_s, b_s, z = term_data
+                if verbose:
+                    print()
+                    print("  Evaluating term %i/%i : %iF%i" % \
+                        (term_index+1, len(terms), len(a_s), len(b_s)))
+                    print("    powers", ctx.nstr(w_s), ctx.nstr(c_s))
+                    print("    gamma", ctx.nstr(alpha_s), ctx.nstr(beta_s))
+                    print("    hyper", ctx.nstr(a_s), ctx.nstr(b_s))
+                    print("    z", ctx.nstr(z))
+                #v = ctx.hyper(a_s, b_s, z, **kwargs)
+                #for a in alpha_s: v *= ctx.gamma(a)
+                #for b in beta_s: v *= ctx.rgamma(b)
+                #for w, c in zip(w_s, c_s): v *= ctx.power(w, c)
+                v = ctx.fprod([ctx.hyper(a_s, b_s, z, **kwargs)] + \
+                    [ctx.gamma(a) for a in alpha_s] + \
+                    [ctx.rgamma(b) for b in beta_s] + \
+                    [ctx.power(w,c) for (w,c) in zip(w_s,c_s)])
+                if verbose:
+                    print("    Value:", v)
+                evaluated_terms.append(v)
+            if len(terms) == 1 and (not perturb):
+                sumvalue = evaluated_terms[0]
+                break
+            if ctx._fixed_precision:
+                sumvalue = ctx.fsum(evaluated_terms)
+                break
+            sumvalue = ctx.fsum(evaluated_terms)
+            term_magnitudes = [ctx.mag(x) for x in evaluated_terms]
+            max_magnitude = max(term_magnitudes)
+            sum_magnitude = ctx.mag(sumvalue)
+            cancellation = max_magnitude - sum_magnitude
+            if verbose:
+                print()
+                print("  Cancellation:", cancellation, "bits")
+                print("  Increased precision:", ctx.prec - orig, "bits")
+            precision_ok = cancellation < ctx.prec - orig
+            if zeroprec is None:
+                zero_ok = False
+            else:
+                zero_ok = max_magnitude - ctx.prec < -zeroprec
+            if infprec is None:
+                inf_ok = False
+            else:
+                inf_ok = max_magnitude > infprec
+            if precision_ok and (not perturb) or ctx.isnan(cancellation):
+                break
+            elif precision_ok:
+                if perturbed_reference_value is None:
+                    hextra += 20
+                    perturbed_reference_value = sumvalue
+                    continue
+                elif ctx.mag(sumvalue - perturbed_reference_value) <= \
+                        ctx.mag(sumvalue) - orig:
+                    break
+                elif zero_ok:
+                    sumvalue = ctx.zero
+                    break
+                elif inf_ok:
+                    sumvalue = ctx.inf
+                    break
+                elif 'hmag' in kwargs:
+                    break
+                else:
+                    hextra *= 2
+                    perturbed_reference_value = sumvalue
+            # Increase precision
+            else:
+                increment = min(max(cancellation, orig//2), max(extraprec,orig))
+                ctx.prec += increment
+                if verbose:
+                    print("  Must start over with increased precision")
+                continue
+    finally:
+        ctx.prec = orig
+    return +sumvalue
+@defun
+def hyper(ctx, a_s, b_s, z, **kwargs):
+    """
+    Hypergeometric function, general case.
+    """
+    z = ctx.convert(z)
+    p = len(a_s)
+    q = len(b_s)
+    a_s = [ctx._convert_param(a) for a in a_s]
+    b_s = [ctx._convert_param(b) for b in b_s]
+    # Reduce degree by eliminating common parameters
+    if kwargs.get('eliminate', True):
+        elim_nonpositive = kwargs.get('eliminate_all', False)
+        i = 0
+        while i < q and a_s:
+            b = b_s[i]
+            if b in a_s and (elim_nonpositive or not ctx.isnpint(b[0])):
+                a_s.remove(b)
+                b_s.remove(b)
+                p -= 1
+                q -= 1
+            else:
+                i += 1
+    # Handle special cases
+    if p == 0:
+        if   q == 1: return ctx._hyp0f1(b_s, z, **kwargs)
+        elif q == 0: return ctx.exp(z)
+    elif p == 1:
+        if   q == 1: return ctx._hyp1f1(a_s, b_s, z, **kwargs)
+        elif q == 2: return ctx._hyp1f2(a_s, b_s, z, **kwargs)
+        elif q == 0: return ctx._hyp1f0(a_s[0][0], z)
+    elif p == 2:
+        if   q == 1: return ctx._hyp2f1(a_s, b_s, z, **kwargs)
+        elif q == 2: return ctx._hyp2f2(a_s, b_s, z, **kwargs)
+        elif q == 3: return ctx._hyp2f3(a_s, b_s, z, **kwargs)
+        elif q == 0: return ctx._hyp2f0(a_s, b_s, z, **kwargs)
+    elif p == q+1:
+        return ctx._hypq1fq(p, q, a_s, b_s, z, **kwargs)
+    elif p > q+1 and not kwargs.get('force_series'):
+        return ctx._hyp_borel(p, q, a_s, b_s, z, **kwargs)
+    coeffs, types = zip(*(a_s+b_s))
+    return ctx.hypsum(p, q, types, coeffs, z, **kwargs)
+@defun
+def hyp0f1(ctx,b,z,**kwargs):
+    return ctx.hyper([],[b],z,**kwargs)
+@defun
+def hyp1f1(ctx,a,b,z,**kwargs):
+    return ctx.hyper([a],[b],z,**kwargs)
+@defun
+def hyp1f2(ctx,a1,b1,b2,z,**kwargs):
+    return ctx.hyper([a1],[b1,b2],z,**kwargs)
+@defun
+def hyp2f1(ctx,a,b,c,z,**kwargs):
+    return ctx.hyper([a,b],[c],z,**kwargs)
+@defun
+def hyp2f2(ctx,a1,a2,b1,b2,z,**kwargs):
+    return ctx.hyper([a1,a2],[b1,b2],z,**kwargs)
+@defun
+def hyp2f3(ctx,a1,a2,b1,b2,b3,z,**kwargs):
+    return ctx.hyper([a1,a2],[b1,b2,b3],z,**kwargs)
+@defun
+def hyp2f0(ctx,a,b,z,**kwargs):
+    return ctx.hyper([a,b],[],z,**kwargs)
+@defun
+def hyp3f2(ctx,a1,a2,a3,b1,b2,z,**kwargs):
+    return ctx.hyper([a1,a2,a3],[b1,b2],z,**kwargs)
+@defun_wrapped
+def _hyp1f0(ctx, a, z):
+    return (1-z) ** (-a)
+@defun
+def _hyp0f1(ctx, b_s, z, **kwargs):
+    (b, btype), = b_s
+    if z:
+        magz = ctx.mag(z)
+    else:
+        magz = 0
+    if magz >= 8 and not kwargs.get('force_series'):
+        try:
+            # http://functions.wolfram.com/HypergeometricFunctions/
+            # Hypergeometric0F1/06/02/03/0004/
+            # TODO: handle the all-real case more efficiently!
+            # TODO: figure out how much precision is needed (exponential growth)
+            orig = ctx.prec
+            try:
+                ctx.prec += 12 + magz//2
+                def h():
+                    w = ctx.sqrt(-z)
+                    jw = ctx.j*w
+                    u = 1/(4*jw)
+                    c = ctx.mpq_1_2 - b
+                    E = ctx.exp(2*jw)
+                    T1 = ([-jw,E], [c,-1], [], [], [b-ctx.mpq_1_2, ctx.mpq_3_2-b], [], -u)
+                    T2 = ([jw,E], [c,1], [], [], [b-ctx.mpq_1_2, ctx.mpq_3_2-b], [], u)
+                    return T1, T2
+                v = ctx.hypercomb(h, [], force_series=True)
+                v = ctx.gamma(b)/(2*ctx.sqrt(ctx.pi))*v
+            finally:
+                ctx.prec = orig
+            if ctx._is_real_type(b) and ctx._is_real_type(z):
+                v = ctx._re(v)
+            return +v
+        except ctx.NoConvergence:
+            pass
+    return ctx.hypsum(0, 1, (btype,), [b], z, **kwargs)
+@defun
+def _hyp1f1(ctx, a_s, b_s, z, **kwargs):
+    (a, atype), = a_s
+    (b, btype), = b_s
+    if not z:
+        return ctx.one+z
+    magz = ctx.mag(z)
+    if magz >= 7 and not (ctx.isint(a) and ctx.re(a) <= 0):
+        if ctx.isinf(z):
+            if ctx.sign(a) == ctx.sign(b) == ctx.sign(z) == 1:
+                return ctx.inf
+            return ctx.nan * z
+        try:
+            try:
+                ctx.prec += magz
+                sector = ctx._im(z) < 0
+                def h(a,b):
+                    if sector:
+                        E = ctx.expjpi(ctx.fneg(a, exact=True))
+                    else:
+                        E = ctx.expjpi(a)
+                    rz = 1/z
+                    T1 = ([E,z], [1,-a], [b], [b-a], [a, 1+a-b], [], -rz)
+                    T2 = ([ctx.exp(z),z], [1,a-b], [b], [a], [b-a, 1-a], [], rz)
+                    return T1, T2
+                v = ctx.hypercomb(h, [a,b], force_series=True)
+                if ctx._is_real_type(a) and ctx._is_real_type(b) and ctx._is_real_type(z):
+                    v = ctx._re(v)
+                return +v
+            except ctx.NoConvergence:
+                pass
+        finally:
+            ctx.prec -= magz
+    v = ctx.hypsum(1, 1, (atype, btype), [a, b], z, **kwargs)
+    return v
+def _hyp2f1_gosper(ctx,a,b,c,z,**kwargs):
+    # Use Gosper's recurrence
+    # See http://www.math.utexas.edu/pipermail/maxima/2006/000126.html
+    _a,_b,_c,_z = a, b, c, z
+    orig = ctx.prec
+    maxprec = kwargs.get('maxprec', 100*orig)
+    extra = 10
+    while 1:
+        ctx.prec = orig + extra
+        #a = ctx.convert(_a)
+        #b = ctx.convert(_b)
+        #c = ctx.convert(_c)
+        z = ctx.convert(_z)
+        d = ctx.mpf(0)
+        e = ctx.mpf(1)
+        f = ctx.mpf(0)
+        k = 0
+        # Common subexpression elimination, unfortunately making
+        # things a bit unreadable. The formula is quite messy to begin
+        # with, though...
+        abz = a*b*z
+        ch = c * ctx.mpq_1_2
+        c1h = (c+1) * ctx.mpq_1_2
+        nz = 1-z
+        g = z/nz
+        abg = a*b*g
+        cba = c-b-a
+        z2 = z-2
+        tol = -ctx.prec - 10
+        nstr = ctx.nstr
+        nprint = ctx.nprint
+        mag = ctx.mag
+        maxmag = ctx.ninf
+        while 1:
+            kch = k+ch
+            kakbz = (k+a)*(k+b)*z / (4*(k+1)*kch*(k+c1h))
+            d1 = kakbz*(e-(k+cba)*d*g)
+            e1 = kakbz*(d*abg+(k+c)*e)
+            ft = d*(k*(cba*z+k*z2-c)-abz)/(2*kch*nz)
+            f1 = f + e - ft
+            maxmag = max(maxmag, mag(f1))
+            if mag(f1-f) < tol:
+                break
+            d, e, f = d1, e1, f1
+            k += 1
+        cancellation = maxmag - mag(f1)
+        if cancellation < extra:
+            break
+        else:
+            extra += cancellation
+            if extra > maxprec:
+                raise ctx.NoConvergence
+    return f1
+@defun
+def _hyp2f1(ctx, a_s, b_s, z, **kwargs):
+    (a, atype), (b, btype) = a_s
+    (c, ctype), = b_s
+    if z == 1:
+        # TODO: the following logic can be simplified
+        convergent = ctx.re(c-a-b) > 0
+        finite = (ctx.isint(a) and a <= 0) or (ctx.isint(b) and b <= 0)
+        zerodiv = ctx.isint(c) and c <= 0 and not \
+            ((ctx.isint(a) and c <= a <= 0) or (ctx.isint(b) and c <= b <= 0))
+        #print "bz", a, b, c, z, convergent, finite, zerodiv
+        # Gauss's theorem gives the value if convergent
+        if (convergent or finite) and not zerodiv:
+            return ctx.gammaprod([c, c-a-b], [c-a, c-b], _infsign=True)
+        # Otherwise, there is a pole and we take the
+        # sign to be that when approaching from below
+        # XXX: this evaluation is not necessarily correct in all cases
+        return ctx.hyp2f1(a,b,c,1-ctx.eps*2) * ctx.inf
+    # Equal to 1 (first term), unless there is a subsequent
+    # division by zero
+    if not z:
+        # Division by zero but power of z is higher than
+        # first order so cancels
+        if c or a == 0 or b == 0:
+            return 1+z
+        # Indeterminate
+        return ctx.nan
+    # Hit zero denominator unless numerator goes to 0 first
+    if ctx.isint(c) and c <= 0:
+        if (ctx.isint(a) and c <= a <= 0) or \
+           (ctx.isint(b) and c <= b <= 0):
+            pass
+        else:
+            # Pole in series
+            return ctx.inf
+    absz = abs(z)
+    # Fast case: standard series converges rapidly,
+    # possibly in finitely many terms
+    if absz <= 0.8 or (ctx.isint(a) and a <= 0 and a >= -1000) or \
+                      (ctx.isint(b) and b <= 0 and b >= -1000):
+        return ctx.hypsum(2, 1, (atype, btype, ctype), [a, b, c], z, **kwargs)
+    orig = ctx.prec
+    try:
+        ctx.prec += 10
+        # Use 1/z transformation
+        if absz >= 1.3:
+            def h(a,b):
+                t = ctx.mpq_1-c; ab = a-b; rz = 1/z
+                T1 = ([-z],[-a], [c,-ab],[b,c-a], [a,t+a],[ctx.mpq_1+ab],  rz)
+                T2 = ([-z],[-b], [c,ab],[a,c-b], [b,t+b],[ctx.mpq_1-ab],  rz)
+                return T1, T2
+            v = ctx.hypercomb(h, [a,b], **kwargs)
+        # Use 1-z transformation
+        elif abs(1-z) <= 0.75:
+            def h(a,b):
+                t = c-a-b; ca = c-a; cb = c-b; rz = 1-z
+                T1 = [], [], [c,t], [ca,cb], [a,b], [1-t], rz
+                T2 = [rz], [t], [c,a+b-c], [a,b], [ca,cb], [1+t], rz
+                return T1, T2
+            v = ctx.hypercomb(h, [a,b], **kwargs)
+        # Use z/(z-1) transformation
+        elif abs(z/(z-1)) <= 0.75:
+            v = ctx.hyp2f1(a, c-b, c, z/(z-1)) / (1-z)**a
+        # Remaining part of unit circle
+        else:
+            v = _hyp2f1_gosper(ctx,a,b,c,z,**kwargs)
+    finally:
+        ctx.prec = orig
+    return +v
+@defun
+def _hypq1fq(ctx, p, q, a_s, b_s, z, **kwargs):
+    r"""
+    Evaluates 3F2, 4F3, 5F4, ...
+    """
+    a_s, a_types = zip(*a_s)
+    b_s, b_types = zip(*b_s)
+    a_s = list(a_s)
+    b_s = list(b_s)
+    absz = abs(z)
+    ispoly = False
+    for a in a_s:
+        if ctx.isint(a) and a <= 0:
+            ispoly = True
+            break
+    # Direct summation
+    if absz < 1 or ispoly:
+        try:
+            return ctx.hypsum(p, q, a_types+b_types, a_s+b_s, z, **kwargs)
+        except ctx.NoConvergence:
+            if absz > 1.1 or ispoly:
+                raise
+    # Use expansion at |z-1| -> 0.
+    # Reference: Wolfgang Buhring, "Generalized Hypergeometric Functions at
+    #   Unit Argument", Proc. Amer. Math. Soc., Vol. 114, No. 1 (Jan. 1992),
+    #   pp.145-153
+    # The current implementation has several problems:
+    # 1. We only implement it for 3F2. The expansion coefficients are
+    #    given by extremely messy nested sums in the higher degree cases
+    #    (see reference). Is efficient sequential generation of the coefficients
+    #    possible in the > 3F2 case?
+    # 2. Although the series converges, it may do so slowly, so we need
+    #    convergence acceleration. The acceleration implemented by
+    #    nsum does not always help, so results returned are sometimes
+    #    inaccurate! Can we do better?
+    # 3. We should check conditions for convergence, and possibly
+    #    do a better job of cancelling out gamma poles if possible.
+    if z == 1:
+        # XXX: should also check for division by zero in the
+        # denominator of the series (cf. hyp2f1)
+        S = ctx.re(sum(b_s)-sum(a_s))
+        if S <= 0:
+            #return ctx.hyper(a_s, b_s, 1-ctx.eps*2, **kwargs) * ctx.inf
+            return ctx.hyper(a_s, b_s, 0.9, **kwargs) * ctx.inf
+    if (p,q) == (3,2) and abs(z-1) < 0.05:   # and kwargs.get('sum1')
+        #print "Using alternate summation (experimental)"
+        a1,a2,a3 = a_s
+        b1,b2 = b_s
+        u = b1+b2-a3
+        initial = ctx.gammaprod([b2-a3,b1-a3,a1,a2],[b2-a3,b1-a3,1,u])
+        def term(k, _cache={0:initial}):
+            u = b1+b2-a3+k
+            if k in _cache:
+                t = _cache[k]
+            else:
+                t = _cache[k-1]
+                t *= (b1+k-a3-1)*(b2+k-a3-1)
+                t /= k*(u-1)
+                _cache[k] = t
+            return t * ctx.hyp2f1(a1,a2,u,z)
+        try:
+            S = ctx.nsum(term, [0,ctx.inf], verbose=kwargs.get('verbose'),
+                strict=kwargs.get('strict', True))
+            return S * ctx.gammaprod([b1,b2],[a1,a2,a3])
+        except ctx.NoConvergence:
+            pass
+    # Try to use convergence acceleration on and close to the unit circle.
+    # Problem: the convergence acceleration degenerates as |z-1| -> 0,
+    # except for special cases. Everywhere else, the Shanks transformation
+    # is very efficient.
+    if absz < 1.1 and ctx._re(z) <= 1:
+        def term(kk, _cache={0:ctx.one}):
+            k = int(kk)
+            if k != kk:
+                t = z ** ctx.mpf(kk) / ctx.fac(kk)
+                for a in a_s: t *= ctx.rf(a,kk)
+                for b in b_s: t /= ctx.rf(b,kk)
+                return t
+            if k in _cache:
+                return _cache[k]
+            t = term(k-1)
+            m = k-1
+            for j in xrange(p): t *= (a_s[j]+m)
+            for j in xrange(q): t /= (b_s[j]+m)
+            t *= z
+            t /= k
+            _cache[k] = t
+            return t
+        sum_method = kwargs.get('sum_method', 'r+s+e')
+        try:
+            return ctx.nsum(term, [0,ctx.inf], verbose=kwargs.get('verbose'),
+                strict=kwargs.get('strict', True),
+                method=sum_method.replace('e',''))
+        except ctx.NoConvergence:
+            if 'e' not in sum_method:
+                raise
+            pass
+        if kwargs.get('verbose'):
+            print("Attempting Euler-Maclaurin summation")
+        """
+        Somewhat slower version (one diffs_exp for each factor).
+        However, this would be faster with fast direct derivatives
+        of the gamma function.
+        def power_diffs(k0):
+            r = 0
+            l = ctx.log(z)
+            while 1:
+                yield z**ctx.mpf(k0) * l**r
+                r += 1
+        def loggamma_diffs(x, reciprocal=False):
+            sign = (-1) ** reciprocal
+            yield sign * ctx.loggamma(x)
+            i = 0
+            while 1:
+                yield sign * ctx.psi(i,x)
+                i += 1
+        def hyper_diffs(k0):
+            b2 = b_s + [1]
+            A = [ctx.diffs_exp(loggamma_diffs(a+k0)) for a in a_s]
+            B = [ctx.diffs_exp(loggamma_diffs(b+k0,True)) for b in b2]
+            Z = [power_diffs(k0)]
+            C = ctx.gammaprod([b for b in b2], [a for a in a_s])
+            for d in ctx.diffs_prod(A + B + Z):
+                v = C * d
+                yield v
+        """
+        def log_diffs(k0):
+            b2 = b_s + [1]
+            yield sum(ctx.loggamma(a+k0) for a in a_s) - \
+                sum(ctx.loggamma(b+k0) for b in b2) + k0*ctx.log(z)
+            i = 0
+            while 1:
+                v = sum(ctx.psi(i,a+k0) for a in a_s) - \
+                    sum(ctx.psi(i,b+k0) for b in b2)
+                if i == 0:
+                    v += ctx.log(z)
+                yield v
+                i += 1
+        def hyper_diffs(k0):
+            C = ctx.gammaprod([b for b in b_s], [a for a in a_s])
+            for d in ctx.diffs_exp(log_diffs(k0)):
+                v = C * d
+                yield v
+        tol = ctx.eps / 1024
+        prec = ctx.prec
+        try:
+            trunc = 50 * ctx.dps
+            ctx.prec += 20
+            for i in xrange(5):
+                head = ctx.fsum(term(k) for k in xrange(trunc))
+                tail, err = ctx.sumem(term, [trunc, ctx.inf], tol=tol,
+                    adiffs=hyper_diffs(trunc),
+                    verbose=kwargs.get('verbose'),
+                    error=True,
+                    _fast_abort=True)
+                if err < tol:
+                    v = head + tail
+                    break
+                trunc *= 2
+                # Need to increase precision because calculation of
+                # derivatives may be inaccurate
+                ctx.prec += ctx.prec//2
+                if i == 4:
+                    raise ctx.NoConvergence(\
+                        "Euler-Maclaurin summation did not converge")
+        finally:
+            ctx.prec = prec
+        return +v
+    # Use 1/z transformation
+    # http://functions.wolfram.com/HypergeometricFunctions/
+    #   HypergeometricPFQ/06/01/05/02/0004/
+    def h(*args):
+        a_s = list(args[:p])
+        b_s = list(args[p:])
+        Ts = []
+        recz = ctx.one/z
+        negz = ctx.fneg(z, exact=True)
+        for k in range(q+1):
+            ak = a_s[k]
+            C = [negz]
+            Cp = [-ak]
+            Gn = b_s + [ak] + [a_s[j]-ak for j in range(q+1) if j != k]
+            Gd = a_s + [b_s[j]-ak for j in range(q)]
+            Fn = [ak] + [ak-b_s[j]+1 for j in range(q)]
+            Fd = [1-a_s[j]+ak for j in range(q+1) if j != k]
+            Ts.append((C, Cp, Gn, Gd, Fn, Fd, recz))
+        return Ts
+    return ctx.hypercomb(h, a_s+b_s, **kwargs)
+@defun
+def _hyp_borel(ctx, p, q, a_s, b_s, z, **kwargs):
+    if a_s:
+        a_s, a_types = zip(*a_s)
+        a_s = list(a_s)
+    else:
+        a_s, a_types = [], ()
+    if b_s:
+        b_s, b_types = zip(*b_s)
+        b_s = list(b_s)
+    else:
+        b_s, b_types = [], ()
+    kwargs['maxterms'] = kwargs.get('maxterms', ctx.prec)
+    try:
+        return ctx.hypsum(p, q, a_types+b_types, a_s+b_s, z, **kwargs)
+    except ctx.NoConvergence:
+        pass
+    prec = ctx.prec
+    try:
+        tol = kwargs.get('asymp_tol', ctx.eps/4)
+        ctx.prec += 10
+        # hypsum is has a conservative tolerance. So we try again:
+        def term(k, cache={0:ctx.one}):
+            if k in cache:
+                return cache[k]
+            t = term(k-1)
+            for a in a_s: t *= (a+(k-1))
+            for b in b_s: t /= (b+(k-1))
+            t *= z
+            t /= k
+            cache[k] = t
+            return t
+        s = ctx.one
+        for k in xrange(1, ctx.prec):
+            t = term(k)
+            s += t
+            if abs(t) <= tol:
+                return s
+    finally:
+        ctx.prec = prec
+    if p <= q+3:
+        contour = kwargs.get('contour')
+        if not contour:
+            if ctx.arg(z) < 0.25:
+                u = z / max(1, abs(z))
+                if ctx.arg(z) >= 0:
+                    contour = [0, 2j, (2j+2)/u, 2/u, ctx.inf]
+                else:
+                    contour = [0, -2j, (-2j+2)/u, 2/u, ctx.inf]
+                #contour = [0, 2j/z, 2/z, ctx.inf]
+                #contour = [0, 2j, 2/z, ctx.inf]
+                #contour = [0, 2j, ctx.inf]
+            else:
+                contour = [0, ctx.inf]
+        quad_kwargs = kwargs.get('quad_kwargs', {})
+        def g(t):
+            return ctx.exp(-t)*ctx.hyper(a_s, b_s+[1], t*z)
+        I, err = ctx.quad(g, contour, error=True, **quad_kwargs)
+        if err <= abs(I)*ctx.eps*8:
+            return I
+    raise ctx.NoConvergence
+@defun
+def _hyp2f2(ctx, a_s, b_s, z, **kwargs):
+    (a1, a1type), (a2, a2type) = a_s
+    (b1, b1type), (b2, b2type) = b_s
+    absz = abs(z)
+    magz = ctx.mag(z)
+    orig = ctx.prec
+    # Asymptotic expansion is ~ exp(z)
+    asymp_extraprec = magz
+    # Asymptotic series is in terms of 3F1
+    can_use_asymptotic = (not kwargs.get('force_series')) and \
+        (ctx.mag(absz) > 3)
+    # TODO: much of the following could be shared with 2F3 instead of
+    # copypasted
+    if can_use_asymptotic:
+        #print "using asymp"
+        try:
+            try:
+                ctx.prec += asymp_extraprec
+                # http://functions.wolfram.com/HypergeometricFunctions/
+                # Hypergeometric2F2/06/02/02/0002/
+                def h(a1,a2,b1,b2):
+                    X = a1+a2-b1-b2
+                    A2 = a1+a2
+                    B2 = b1+b2
+                    c = {}
+                    c[0] = ctx.one
+                    c[1] = (A2-1)*X+b1*b2-a1*a2
+                    s1 = 0
+                    k = 0
+                    tprev = 0
+                    while 1:
+                        if k not in c:
+                            uu1 = 1-B2+2*a1+a1**2+2*a2+a2**2-A2*B2+a1*a2+b1*b2+(2*B2-3*(A2+1))*k+2*k**2
+                            uu2 = (k-A2+b1-1)*(k-A2+b2-1)*(k-X-2)
+                            c[k] = ctx.one/k * (uu1*c[k-1]-uu2*c[k-2])
+                        t1 = c[k] * z**(-k)
+                        if abs(t1) < 0.1*ctx.eps:
+                            #print "Convergence :)"
+                            break
+                        # Quit if the series doesn't converge quickly enough
+                        if k > 5 and abs(tprev) / abs(t1) < 1.5:
+                            #print "No convergence :("
+                            raise ctx.NoConvergence
+                        s1 += t1
+                        tprev = t1
+                        k += 1
+                    S = ctx.exp(z)*s1
+                    T1 = [z,S], [X,1], [b1,b2],[a1,a2],[],[],0
+                    T2 = [-z],[-a1],[b1,b2,a2-a1],[a2,b1-a1,b2-a1],[a1,a1-b1+1,a1-b2+1],[a1-a2+1],-1/z
+                    T3 = [-z],[-a2],[b1,b2,a1-a2],[a1,b1-a2,b2-a2],[a2,a2-b1+1,a2-b2+1],[-a1+a2+1],-1/z
+                    return T1, T2, T3
+                v = ctx.hypercomb(h, [a1,a2,b1,b2], force_series=True, maxterms=4*ctx.prec)
+                if sum(ctx._is_real_type(u) for u in [a1,a2,b1,b2,z]) == 5:
+                    v = ctx.re(v)
+                return v
+            except ctx.NoConvergence:
+                pass
+        finally:
+            ctx.prec = orig
+    return ctx.hypsum(2, 2, (a1type, a2type, b1type, b2type), [a1, a2, b1, b2], z, **kwargs)
+@defun
+def _hyp1f2(ctx, a_s, b_s, z, **kwargs):
+    (a1, a1type), = a_s
+    (b1, b1type), (b2, b2type) = b_s
+    absz = abs(z)
+    magz = ctx.mag(z)
+    orig = ctx.prec
+    # Asymptotic expansion is ~ exp(sqrt(z))
+    asymp_extraprec = z and magz//2
+    # Asymptotic series is in terms of 3F0
+    can_use_asymptotic = (not kwargs.get('force_series')) and \
+        (ctx.mag(absz) > 19) and \
+        (ctx.sqrt(absz) > 1.5*orig)  # and \
+    #   ctx._hyp_check_convergence([a1, a1-b1+1, a1-b2+1], [],
+    #                              1/absz, orig+40+asymp_extraprec)
+    # TODO: much of the following could be shared with 2F3 instead of
+    # copypasted
+    if can_use_asymptotic:
+        #print "using asymp"
+        try:
+            try:
+                ctx.prec += asymp_extraprec
+                # http://functions.wolfram.com/HypergeometricFunctions/
+                # Hypergeometric1F2/06/02/03/
+                def h(a1,b1,b2):
+                    X = ctx.mpq_1_2*(a1-b1-b2+ctx.mpq_1_2)
+                    c = {}
+                    c[0] = ctx.one
+                    c[1] = 2*(ctx.mpq_1_4*(3*a1+b1+b2-2)*(a1-b1-b2)+b1*b2-ctx.mpq_3_16)
+                    c[2] = 2*(b1*b2+ctx.mpq_1_4*(a1-b1-b2)*(3*a1+b1+b2-2)-ctx.mpq_3_16)**2+\
+                        ctx.mpq_1_16*(-16*(2*a1-3)*b1*b2 + \
+                        4*(a1-b1-b2)*(-8*a1**2+11*a1+b1+b2-2)-3)
+                    s1 = 0
+                    s2 = 0
+                    k = 0
+                    tprev = 0
+                    while 1:
+                        if k not in c:
+                            uu1 = (3*k**2+(-6*a1+2*b1+2*b2-4)*k + 3*a1**2 - \
+                                (b1-b2)**2 - 2*a1*(b1+b2-2) + ctx.mpq_1_4)
+                            uu2 = (k-a1+b1-b2-ctx.mpq_1_2)*(k-a1-b1+b2-ctx.mpq_1_2)*\
+                                (k-a1+b1+b2-ctx.mpq_5_2)
+                            c[k] = ctx.one/(2*k)*(uu1*c[k-1]-uu2*c[k-2])
+                        w = c[k] * (-z)**(-0.5*k)
+                        t1 = (-ctx.j)**k * ctx.mpf(2)**(-k) * w
+                        t2 = ctx.j**k * ctx.mpf(2)**(-k) * w
+                        if abs(t1) < 0.1*ctx.eps:
+                            #print "Convergence :)"
+                            break
+                        # Quit if the series doesn't converge quickly enough
+                        if k > 5 and abs(tprev) / abs(t1) < 1.5:
+                            #print "No convergence :("
+                            raise ctx.NoConvergence
+                        s1 += t1
+                        s2 += t2
+                        tprev = t1
+                        k += 1
+                    S = ctx.expj(ctx.pi*X+2*ctx.sqrt(-z))*s1 + \
+                        ctx.expj(-(ctx.pi*X+2*ctx.sqrt(-z)))*s2
+                    T1 = [0.5*S, ctx.pi, -z], [1, -0.5, X], [b1, b2], [a1],\
+                        [], [], 0
+                    T2 = [-z], [-a1], [b1,b2],[b1-a1,b2-a1], \
+                        [a1,a1-b1+1,a1-b2+1], [], 1/z
+                    return T1, T2
+                v = ctx.hypercomb(h, [a1,b1,b2], force_series=True, maxterms=4*ctx.prec)
+                if sum(ctx._is_real_type(u) for u in [a1,b1,b2,z]) == 4:
+                    v = ctx.re(v)
+                return v
+            except ctx.NoConvergence:
+                pass
+        finally:
+            ctx.prec = orig
+    #print "not using asymp"
+    return ctx.hypsum(1, 2, (a1type, b1type, b2type), [a1, b1, b2], z, **kwargs)
+@defun
+def _hyp2f3(ctx, a_s, b_s, z, **kwargs):
+    (a1, a1type), (a2, a2type) = a_s
+    (b1, b1type), (b2, b2type), (b3, b3type) = b_s
+    absz = abs(z)
+    magz = ctx.mag(z)
+    # Asymptotic expansion is ~ exp(sqrt(z))
+    asymp_extraprec = z and magz//2
+    orig = ctx.prec
+    # Asymptotic series is in terms of 4F1
+    # The square root below empirically provides a plausible criterion
+    # for the leading series to converge
+    can_use_asymptotic = (not kwargs.get('force_series')) and \
+        (ctx.mag(absz) > 19) and (ctx.sqrt(absz) > 1.5*orig)
+    if can_use_asymptotic:
+        #print "using asymp"
+        try:
+            try:
+                ctx.prec += asymp_extraprec
+                # http://functions.wolfram.com/HypergeometricFunctions/
+                # Hypergeometric2F3/06/02/03/01/0002/
+                def h(a1,a2,b1,b2,b3):
+                    X = ctx.mpq_1_2*(a1+a2-b1-b2-b3+ctx.mpq_1_2)
+                    A2 = a1+a2
+                    B3 = b1+b2+b3
+                    A = a1*a2
+                    B = b1*b2+b3*b2+b1*b3
+                    R = b1*b2*b3
+                    c = {}
+                    c[0] = ctx.one
+                    c[1] = 2*(B - A + ctx.mpq_1_4*(3*A2+B3-2)*(A2-B3) - ctx.mpq_3_16)
+                    c[2] = ctx.mpq_1_2*c[1]**2 + ctx.mpq_1_16*(-16*(2*A2-3)*(B-A) + 32*R +\
+                        4*(-8*A2**2 + 11*A2 + 8*A + B3 - 2)*(A2-B3)-3)
+                    s1 = 0
+                    s2 = 0
+                    k = 0
+                    tprev = 0
+                    while 1:
+                        if k not in c:
+                            uu1 = (k-2*X-3)*(k-2*X-2*b1-1)*(k-2*X-2*b2-1)*\
+                                (k-2*X-2*b3-1)
+                            uu2 = (4*(k-1)**3 - 6*(4*X+B3)*(k-1)**2 + \
+                                2*(24*X**2+12*B3*X+4*B+B3-1)*(k-1) - 32*X**3 - \
+                                24*B3*X**2 - 4*B - 8*R - 4*(4*B+B3-1)*X + 2*B3-1)
+                            uu3 = (5*(k-1)**2+2*(-10*X+A2-3*B3+3)*(k-1)+2*c[1])
+                            c[k] = ctx.one/(2*k)*(uu1*c[k-3]-uu2*c[k-2]+uu3*c[k-1])
+                        w = c[k] * ctx.power(-z, -0.5*k)
+                        t1 = (-ctx.j)**k * ctx.mpf(2)**(-k) * w
+                        t2 = ctx.j**k * ctx.mpf(2)**(-k) * w
+                        if abs(t1) < 0.1*ctx.eps:
+                            break
+                        # Quit if the series doesn't converge quickly enough
+                        if k > 5 and abs(tprev) / abs(t1) < 1.5:
+                            raise ctx.NoConvergence
+                        s1 += t1
+                        s2 += t2
+                        tprev = t1
+                        k += 1
+                    S = ctx.expj(ctx.pi*X+2*ctx.sqrt(-z))*s1 + \
+                        ctx.expj(-(ctx.pi*X+2*ctx.sqrt(-z)))*s2
+                    T1 = [0.5*S, ctx.pi, -z], [1, -0.5, X], [b1, b2, b3], [a1, a2],\
+                        [], [], 0
+                    T2 = [-z], [-a1], [b1,b2,b3,a2-a1],[a2,b1-a1,b2-a1,b3-a1], \
+                        [a1,a1-b1+1,a1-b2+1,a1-b3+1], [a1-a2+1], 1/z
+                    T3 = [-z], [-a2], [b1,b2,b3,a1-a2],[a1,b1-a2,b2-a2,b3-a2], \
+                        [a2,a2-b1+1,a2-b2+1,a2-b3+1],[-a1+a2+1], 1/z
+                    return T1, T2, T3
+                v = ctx.hypercomb(h, [a1,a2,b1,b2,b3], force_series=True, maxterms=4*ctx.prec)
+                if sum(ctx._is_real_type(u) for u in [a1,a2,b1,b2,b3,z]) == 6:
+                    v = ctx.re(v)
+                return v
+            except ctx.NoConvergence:
+                pass
+        finally:
+            ctx.prec = orig
+    return ctx.hypsum(2, 3, (a1type, a2type, b1type, b2type, b3type), [a1, a2, b1, b2, b3], z, **kwargs)
+@defun
+def _hyp2f0(ctx, a_s, b_s, z, **kwargs):
+    (a, atype), (b, btype) = a_s
+    # We want to try aggressively to use the asymptotic expansion,
+    # and fall back only when absolutely necessary
+    try:
+        kwargsb = kwargs.copy()
+        kwargsb['maxterms'] = kwargsb.get('maxterms', ctx.prec)
+        return ctx.hypsum(2, 0, (atype,btype), [a,b], z, **kwargsb)
+    except ctx.NoConvergence:
+        if kwargs.get('force_series'):
+            raise
+        pass
+    def h(a, b):
+        w = ctx.sinpi(b)
+        rz = -1/z
+        T1 = ([ctx.pi,w,rz],[1,-1,a],[],[a-b+1,b],[a],[b],rz)
+        T2 = ([-ctx.pi,w,rz],[1,-1,1+a-b],[],[a,2-b],[a-b+1],[2-b],rz)
+        return T1, T2
+    return ctx.hypercomb(h, [a, 1+a-b], **kwargs)
+@defun
+def meijerg(ctx, a_s, b_s, z, r=1, series=None, **kwargs):
+    an, ap = a_s
+    bm, bq = b_s
+    n = len(an)
+    p = n + len(ap)
+    m = len(bm)
+    q = m + len(bq)
+    a = an+ap
+    b = bm+bq
+    a = [ctx.convert(_) for _ in a]
+    b = [ctx.convert(_) for _ in b]
+    z = ctx.convert(z)
+    if series is None:
+        if p < q: series = 1
+        if p > q: series = 2
+        if p == q:
+            if m+n == p and abs(z) > 1:
+                series = 2
+            else:
+                series = 1
+    if kwargs.get('verbose'):
+        print("Meijer G m,n,p,q,series =", m,n,p,q,series)
+    if series == 1:
+        def h(*args):
+            a = args[:p]
+            b = args[p:]
+            terms = []
+            for k in range(m):
+                bases = [z]
+                expts = [b[k]/r]
+                gn = [b[j]-b[k] for j in range(m) if j != k]
+                gn += [1-a[j]+b[k] for j in range(n)]
+                gd = [a[j]-b[k] for j in range(n,p)]
+                gd += [1-b[j]+b[k] for j in range(m,q)]
+                hn = [1-a[j]+b[k] for j in range(p)]
+                hd = [1-b[j]+b[k] for j in range(q) if j != k]
+                hz = (-ctx.one)**(p-m-n) * z**(ctx.one/r)
+                terms.append((bases, expts, gn, gd, hn, hd, hz))
+            return terms
+    else:
+        def h(*args):
+            a = args[:p]
+            b = args[p:]
+            terms = []
+            for k in range(n):
+                bases = [z]
+                if r == 1:
+                    expts = [a[k]-1]
+                else:
+                    expts = [(a[k]-1)/ctx.convert(r)]
+                gn = [a[k]-a[j] for j in range(n) if j != k]
+                gn += [1-a[k]+b[j] for j in range(m)]
+                gd = [a[k]-b[j] for j in range(m,q)]
+                gd += [1-a[k]+a[j] for j in range(n,p)]
+                hn = [1-a[k]+b[j] for j in range(q)]
+                hd = [1+a[j]-a[k] for j in range(p) if j != k]
+                hz = (-ctx.one)**(q-m-n) / z**(ctx.one/r)
+                terms.append((bases, expts, gn, gd, hn, hd, hz))
+            return terms
+    return ctx.hypercomb(h, a+b, **kwargs)
+@defun_wrapped
+def appellf1(ctx,a,b1,b2,c,x,y,**kwargs):
+    # Assume x smaller
+    # We will use x for the outer loop
+    if abs(x) > abs(y):
+        x, y = y, x
+        b1, b2 = b2, b1
+    def ok(x):
+        return abs(x) < 0.99
+    # Finite cases
+    if ctx.isnpint(a):
+        pass
+    elif ctx.isnpint(b1):
+        pass
+    elif ctx.isnpint(b2):
+        x, y, b1, b2 = y, x, b2, b1
+    else:
+        #print x, y
+        # Note: ok if |y| > 1, because
+        # 2F1 implements analytic continuation
+        if not ok(x):
+            u1 = (x-y)/(x-1)
+            if not ok(u1):
+                raise ValueError("Analytic continuation not implemented")
+            #print "Using analytic continuation"
+            return (1-x)**(-b1)*(1-y)**(c-a-b2)*\
+                ctx.appellf1(c-a,b1,c-b1-b2,c,u1,y,**kwargs)
+    return ctx.hyper2d({'m+n':[a],'m':[b1],'n':[b2]}, {'m+n':[c]}, x,y, **kwargs)
+@defun
+def appellf2(ctx,a,b1,b2,c1,c2,x,y,**kwargs):
+    # TODO: continuation
+    return ctx.hyper2d({'m+n':[a],'m':[b1],'n':[b2]},
+        {'m':[c1],'n':[c2]}, x,y, **kwargs)
+@defun
+def appellf3(ctx,a1,a2,b1,b2,c,x,y,**kwargs):
+    outer_polynomial = ctx.isnpint(a1) or ctx.isnpint(b1)
+    inner_polynomial = ctx.isnpint(a2) or ctx.isnpint(b2)
+    if not outer_polynomial:
+        if inner_polynomial or abs(x) > abs(y):
+            x, y = y, x
+            a1,a2,b1,b2 = a2,a1,b2,b1
+    return ctx.hyper2d({'m':[a1,b1],'n':[a2,b2]}, {'m+n':[c]},x,y,**kwargs)
+@defun
+def appellf4(ctx,a,b,c1,c2,x,y,**kwargs):
+    # TODO: continuation
+    return ctx.hyper2d({'m+n':[a,b]}, {'m':[c1],'n':[c2]},x,y,**kwargs)
+@defun
+def hyper2d(ctx, a, b, x, y, **kwargs):
+    r"""
+    Sums the generalized 2D hypergeometric series
+    .. math ::
+        \sum_{m=0}^{\infty} \sum_{n=0}^{\infty}
+            \frac{P((a),m,n)}{Q((b),m,n)}
+            \frac{x^m y^n} {m! n!}
+    where `(a) = (a_1,\ldots,a_r)`, `(b) = (b_1,\ldots,b_s)` and where
+    `P` and `Q` are products of rising factorials such as `(a_j)_n` or
+    `(a_j)_{m+n}`. `P` and `Q` are specified in the form of dicts, with
+    the `m` and `n` dependence as keys and parameter lists as values.
+    The supported rising factorials are given in the following table
+    (note that only a few are supported in `Q`):
+    +------------+-------------------+--------+
+    | Key        |  Rising factorial | `Q`    |
+    +============+===================+========+
+    | ``'m'``    |   `(a_j)_m`       | Yes    |
+    +------------+-------------------+--------+
+    | ``'n'``    |   `(a_j)_n`       | Yes    |
+    +------------+-------------------+--------+
+    | ``'m+n'``  |   `(a_j)_{m+n}`   | Yes    |
+    +------------+-------------------+--------+
+    | ``'m-n'``  |   `(a_j)_{m-n}`   | No     |
+    +------------+-------------------+--------+
+    | ``'n-m'``  |   `(a_j)_{n-m}`   | No     |
+    +------------+-------------------+--------+
+    | ``'2m+n'`` |   `(a_j)_{2m+n}`  | No     |
+    +------------+-------------------+--------+
+    | ``'2m-n'`` |   `(a_j)_{2m-n}`  | No     |
+    +------------+-------------------+--------+
+    | ``'2n-m'`` |   `(a_j)_{2n-m}`  | No     |
+    +------------+-------------------+--------+
+    For example, the Appell F1 and F4 functions
+    .. math ::
+        F_1 = \sum_{m=0}^{\infty} \sum_{n=0}^{\infty}
+              \frac{(a)_{m+n} (b)_m (c)_n}{(d)_{m+n}}
+              \frac{x^m y^n}{m! n!}
+        F_4 = \sum_{m=0}^{\infty} \sum_{n=0}^{\infty}
+              \frac{(a)_{m+n} (b)_{m+n}}{(c)_m (d)_{n}}
+              \frac{x^m y^n}{m! n!}
+    can be represented respectively as
+        ``hyper2d({'m+n':[a], 'm':[b], 'n':[c]}, {'m+n':[d]}, x, y)``
+        ``hyper2d({'m+n':[a,b]}, {'m':[c], 'n':[d]}, x, y)``
+    More generally, :func:`~mpmath.hyper2d` can evaluate any of the 34 distinct
+    convergent second-order (generalized Gaussian) hypergeometric
+    series enumerated by Horn, as well as the Kampe de Feriet
+    function.
+    The series is computed by rewriting it so that the inner
+    series (i.e. the series containing `n` and `y`) has the form of an
+    ordinary generalized hypergeometric series and thereby can be
+    evaluated efficiently using :func:`~mpmath.hyper`. If possible,
+    manually swapping `x` and `y` and the corresponding parameters
+    can sometimes give better results.
+    **Examples**
+    Two separable cases: a product of two geometric series, and a
+    product of two Gaussian hypergeometric functions::
+        >>> from mpmath import *
+        >>> mp.dps = 25; mp.pretty = True
+        >>> x, y = mpf(0.25), mpf(0.5)
+        >>> hyper2d({'m':1,'n':1}, {}, x,y)
+        2.666666666666666666666667
+        >>> 1/(1-x)/(1-y)
+        2.666666666666666666666667
+        >>> hyper2d({'m':[1,2],'n':[3,4]}, {'m':[5],'n':[6]}, x,y)
+        4.164358531238938319669856
+        >>> hyp2f1(1,2,5,x)*hyp2f1(3,4,6,y)
+        4.164358531238938319669856
+    Some more series that can be done in closed form::
+        >>> hyper2d({'m':1,'n':1},{'m+n':1},x,y)
+        2.013417124712514809623881
+        >>> (exp(x)*x-exp(y)*y)/(x-y)
+        2.013417124712514809623881
+    Six of the 34 Horn functions, G1-G3 and H1-H3::
+        >>> from mpmath import *
+        >>> mp.dps = 10; mp.pretty = True
+        >>> x, y = 0.0625, 0.125
+        >>> a1,a2,b1,b2,c1,c2,d = 1.1,-1.2,-1.3,-1.4,1.5,-1.6,1.7
+        >>> hyper2d({'m+n':a1,'n-m':b1,'m-n':b2},{},x,y)  # G1
+        1.139090746
+        >>> nsum(lambda m,n: rf(a1,m+n)*rf(b1,n-m)*rf(b2,m-n)*\
+        ...     x**m*y**n/fac(m)/fac(n), [0,inf], [0,inf])
+        1.139090746
+        >>> hyper2d({'m':a1,'n':a2,'n-m':b1,'m-n':b2},{},x,y)  # G2
+        0.9503682696
+        >>> nsum(lambda m,n: rf(a1,m)*rf(a2,n)*rf(b1,n-m)*rf(b2,m-n)*\
+        ...     x**m*y**n/fac(m)/fac(n), [0,inf], [0,inf])
+        0.9503682696
+        >>> hyper2d({'2n-m':a1,'2m-n':a2},{},x,y)  # G3
+        1.029372029
+        >>> nsum(lambda m,n: rf(a1,2*n-m)*rf(a2,2*m-n)*\
+        ...     x**m*y**n/fac(m)/fac(n), [0,inf], [0,inf])
+        1.029372029
+        >>> hyper2d({'m-n':a1,'m+n':b1,'n':c1},{'m':d},x,y)  # H1
+        -1.605331256
+        >>> nsum(lambda m,n: rf(a1,m-n)*rf(b1,m+n)*rf(c1,n)/rf(d,m)*\
+        ...     x**m*y**n/fac(m)/fac(n), [0,inf], [0,inf])
+        -1.605331256
+        >>> hyper2d({'m-n':a1,'m':b1,'n':[c1,c2]},{'m':d},x,y)  # H2
+        -2.35405404
+        >>> nsum(lambda m,n: rf(a1,m-n)*rf(b1,m)*rf(c1,n)*rf(c2,n)/rf(d,m)*\
+        ...     x**m*y**n/fac(m)/fac(n), [0,inf], [0,inf])
+        -2.35405404
+        >>> hyper2d({'2m+n':a1,'n':b1},{'m+n':c1},x,y)  # H3
+        0.974479074
+        >>> nsum(lambda m,n: rf(a1,2*m+n)*rf(b1,n)/rf(c1,m+n)*\
+        ...     x**m*y**n/fac(m)/fac(n), [0,inf], [0,inf])
+        0.974479074
+    **References**
+    1. [SrivastavaKarlsson]_
+    2. [Weisstein]_ http://mathworld.wolfram.com/HornFunction.html
+    3. [Weisstein]_ http://mathworld.wolfram.com/AppellHypergeometricFunction.html
+    """
+    x = ctx.convert(x)
+    y = ctx.convert(y)
+    def parse(dct, key):
+        args = dct.pop(key, [])
+        try:
+            args = list(args)
+        except TypeError:
+            args = [args]
+        return [ctx.convert(arg) for arg in args]
+    a_s = dict(a)
+    b_s = dict(b)
+    a_m = parse(a, 'm')
+    a_n = parse(a, 'n')
+    a_m_add_n = parse(a, 'm+n')
+    a_m_sub_n = parse(a, 'm-n')
+    a_n_sub_m = parse(a, 'n-m')
+    a_2m_add_n = parse(a, '2m+n')
+    a_2m_sub_n = parse(a, '2m-n')
+    a_2n_sub_m = parse(a, '2n-m')
+    b_m = parse(b, 'm')
+    b_n = parse(b, 'n')
+    b_m_add_n = parse(b, 'm+n')
+    if a: raise ValueError("unsupported key: %r" % a.keys()[0])
+    if b: raise ValueError("unsupported key: %r" % b.keys()[0])
+    s = 0
+    outer = ctx.one
+    m = ctx.mpf(0)
+    ok_count = 0
+    prec = ctx.prec
+    maxterms = kwargs.get('maxterms', 20*prec)
+    try:
+        ctx.prec += 10
+        tol = +ctx.eps
+        while 1:
+            inner_sign = 1
+            outer_sign = 1
+            inner_a = list(a_n)
+            inner_b = list(b_n)
+            outer_a = [a+m for a in a_m]
+            outer_b = [b+m for b in b_m]
+            # (a)_{m+n} = (a)_m (a+m)_n
+            for a in a_m_add_n:
+                a = a+m
+                inner_a.append(a)
+                outer_a.append(a)
+            # (b)_{m+n} = (b)_m (b+m)_n
+            for b in b_m_add_n:
+                b = b+m
+                inner_b.append(b)
+                outer_b.append(b)
+            # (a)_{n-m} = (a-m)_n / (a-m)_m
+            for a in a_n_sub_m:
+                inner_a.append(a-m)
+                outer_b.append(a-m-1)
+            # (a)_{m-n} = (-1)^(m+n) (1-a-m)_m / (1-a-m)_n
+            for a in a_m_sub_n:
+                inner_sign *= (-1)
+                outer_sign *= (-1)**(m)
+                inner_b.append(1-a-m)
+                outer_a.append(-a-m)
+            # (a)_{2m+n} = (a)_{2m} (a+2m)_n
+            for a in a_2m_add_n:
+                inner_a.append(a+2*m)
+                outer_a.append((a+2*m)*(1+a+2*m))
+            # (a)_{2m-n} = (-1)^(2m+n) (1-a-2m)_{2m} / (1-a-2m)_n
+            for a in a_2m_sub_n:
+                inner_sign *= (-1)
+                inner_b.append(1-a-2*m)
+                outer_a.append((a+2*m)*(1+a+2*m))
+            # (a)_{2n-m} = 4^n ((a-m)/2)_n ((a-m+1)/2)_n / (a-m)_m
+            for a in a_2n_sub_m:
+                inner_sign *= 4
+                inner_a.append(0.5*(a-m))
+                inner_a.append(0.5*(a-m+1))
+                outer_b.append(a-m-1)
+            inner = ctx.hyper(inner_a, inner_b, inner_sign*y,
+                zeroprec=ctx.prec, **kwargs)
+            term = outer * inner * outer_sign
+            if abs(term) < tol:
+                ok_count += 1
+            else:
+                ok_count = 0
+            if ok_count >= 3 or not outer:
+                break
+            s += term
+            for a in outer_a: outer *= a
+            for b in outer_b: outer /= b
+            m += 1
+            outer = outer * x / m
+            if m > maxterms:
+                raise ctx.NoConvergence("maxterms exceeded in hyper2d")
+    finally:
+        ctx.prec = prec
+    return +s
+"""
+@defun
+def kampe_de_feriet(ctx,a,b,c,d,e,f,x,y,**kwargs):
+    return ctx.hyper2d({'m+n':a,'m':b,'n':c},
+        {'m+n':d,'m':e,'n':f}, x,y, **kwargs)
+"""
+@defun
+def bihyper(ctx, a_s, b_s, z, **kwargs):
+    r"""
+    Evaluates the bilateral hypergeometric series
+    .. math ::
+        \,_AH_B(a_1, \ldots, a_k; b_1, \ldots, b_B; z) =
+            \sum_{n=-\infty}^{\infty}
+            \frac{(a_1)_n \ldots (a_A)_n}
+                 {(b_1)_n \ldots (b_B)_n} \, z^n
+    where, for direct convergence, `A = B` and `|z| = 1`, although a
+    regularized sum exists more generally by considering the
+    bilateral series as a sum of two ordinary hypergeometric
+    functions. In order for the series to make sense, none of the
+    parameters may be integers.
+    **Examples**
+    The value of `\,_2H_2` at `z = 1` is given by Dougall's formula::
+        >>> from mpmath import *
+        >>> mp.dps = 25; mp.pretty = True
+        >>> a,b,c,d = 0.5, 1.5, 2.25, 3.25
+        >>> bihyper([a,b],[c,d],1)
+        -14.49118026212345786148847
+        >>> gammaprod([c,d,1-a,1-b,c+d-a-b-1],[c-a,d-a,c-b,d-b])
+        -14.49118026212345786148847
+    The regularized function `\,_1H_0` can be expressed as the
+    sum of one `\,_2F_0` function and one `\,_1F_1` function::
+        >>> a = mpf(0.25)
+        >>> z = mpf(0.75)
+        >>> bihyper([a], [], z)
+        (0.2454393389657273841385582 + 0.2454393389657273841385582j)
+        >>> hyper([a,1],[],z) + (hyper([1],[1-a],-1/z)-1)
+        (0.2454393389657273841385582 + 0.2454393389657273841385582j)
+        >>> hyper([a,1],[],z) + hyper([1],[2-a],-1/z)/z/(a-1)
+        (0.2454393389657273841385582 + 0.2454393389657273841385582j)
+    **References**
+    1. [Slater]_ (chapter 6: "Bilateral Series", pp. 180-189)
+    2. [Wikipedia]_ http://en.wikipedia.org/wiki/Bilateral_hypergeometric_series
+    """
+    z = ctx.convert(z)
+    c_s = a_s + b_s
+    p = len(a_s)
+    q = len(b_s)
+    if (p, q) == (0,0) or (p, q) == (1,1):
+        return ctx.zero * z
+    neg = (p-q) % 2
+    def h(*c_s):
+        a_s = list(c_s[:p])
+        b_s = list(c_s[p:])
+        aa_s = [2-b for b in b_s]
+        bb_s = [2-a for a in a_s]
+        rp = [(-1)**neg * z] + [1-b for b in b_s] + [1-a for a in a_s]
+        rc = [-1] + [1]*len(b_s) + [-1]*len(a_s)
+        T1 = [], [], [], [], a_s + [1], b_s, z
+        T2 = rp, rc, [], [], aa_s + [1], bb_s, (-1)**neg / z
+        return T1, T2
+    return ctx.hypercomb(h, c_s, **kwargs)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/theta.py ADDED Viewed

	@@ -0,0 +1,1049 @@

+from .functions import defun, defun_wrapped
+@defun
+def _jacobi_theta2(ctx, z, q):
+    extra1 = 10
+    extra2 = 20
+    # the loops below break when the fixed precision quantities
+    # a and b go to zero;
+    # right shifting small negative numbers by wp one obtains -1, not zero,
+    # so the condition a**2 + b**2 > MIN is used to break the loops.
+    MIN = 2
+    if z == ctx.zero:
+        if (not ctx._im(q)):
+            wp = ctx.prec + extra1
+            x = ctx.to_fixed(ctx._re(q), wp)
+            x2 = (x*x) >> wp
+            a = b = x2
+            s = x2
+            while abs(a) > MIN:
+                b = (b*x2) >> wp
+                a = (a*b) >> wp
+                s += a
+            s = (1 << (wp+1)) + (s << 1)
+            s = ctx.ldexp(s, -wp)
+        else:
+            wp = ctx.prec + extra1
+            xre = ctx.to_fixed(ctx._re(q), wp)
+            xim = ctx.to_fixed(ctx._im(q), wp)
+            x2re = (xre*xre - xim*xim) >> wp
+            x2im = (xre*xim) >> (wp-1)
+            are = bre = x2re
+            aim = bim = x2im
+            sre = (1<<wp) + are
+            sim = aim
+            while are**2 + aim**2 > MIN:
+                bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                           (bre * x2im + bim * x2re) >> wp
+                are, aim = (are * bre - aim * bim) >> wp,   \
+                           (are * bim + aim * bre) >> wp
+                sre += are
+                sim += aim
+            sre = (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+    else:
+        if (not ctx._im(q)) and (not ctx._im(z)):
+            wp = ctx.prec + extra1
+            x = ctx.to_fixed(ctx._re(q), wp)
+            x2 = (x*x) >> wp
+            a = b = x2
+            c1, s1 = ctx.cos_sin(ctx._re(z), prec=wp)
+            cn = c1 = ctx.to_fixed(c1, wp)
+            sn = s1 = ctx.to_fixed(s1, wp)
+            c2 = (c1*c1 - s1*s1) >> wp
+            s2 = (c1 * s1) >> (wp - 1)
+            cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+            s = c1 + ((a * cn) >> wp)
+            while abs(a) > MIN:
+                b = (b*x2) >> wp
+                a = (a*b) >> wp
+                cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+                s += (a * cn) >> wp
+            s = (s << 1)
+            s = ctx.ldexp(s, -wp)
+            s *= ctx.nthroot(q, 4)
+            return s
+        # case z real, q complex
+        elif not ctx._im(z):
+            wp = ctx.prec + extra2
+            xre = ctx.to_fixed(ctx._re(q), wp)
+            xim = ctx.to_fixed(ctx._im(q), wp)
+            x2re = (xre*xre - xim*xim) >> wp
+            x2im = (xre*xim) >> (wp - 1)
+            are = bre = x2re
+            aim = bim = x2im
+            c1, s1 = ctx.cos_sin(ctx._re(z), prec=wp)
+            cn = c1 = ctx.to_fixed(c1, wp)
+            sn = s1 = ctx.to_fixed(s1, wp)
+            c2 = (c1*c1 - s1*s1) >> wp
+            s2 = (c1 * s1) >> (wp - 1)
+            cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+            sre = c1 + ((are * cn) >> wp)
+            sim = ((aim * cn) >> wp)
+            while are**2 + aim**2 > MIN:
+                bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                           (bre * x2im + bim * x2re) >> wp
+                are, aim = (are * bre - aim * bim) >> wp,   \
+                           (are * bim + aim * bre) >> wp
+                cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+                sre += ((are * cn) >> wp)
+                sim += ((aim * cn) >> wp)
+            sre = (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+        #case z complex, q real
+        elif not ctx._im(q):
+            wp = ctx.prec + extra2
+            x = ctx.to_fixed(ctx._re(q), wp)
+            x2 = (x*x) >> wp
+            a = b = x2
+            prec0 = ctx.prec
+            ctx.prec = wp
+            c1, s1 = ctx.cos_sin(z)
+            ctx.prec = prec0
+            cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+            cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+            snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+            snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+            #c2 = (c1*c1 - s1*s1) >> wp
+            c2re = (c1re*c1re - c1im*c1im - s1re*s1re + s1im*s1im) >> wp
+            c2im = (c1re*c1im - s1re*s1im) >> (wp - 1)
+            #s2 = (c1 * s1) >> (wp - 1)
+            s2re = (c1re*s1re - c1im*s1im) >> (wp - 1)
+            s2im = (c1re*s1im + c1im*s1re) >> (wp - 1)
+            #cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+            t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+            t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+            t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+            t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+            cnre = t1
+            cnim = t2
+            snre = t3
+            snim = t4
+            sre = c1re + ((a * cnre) >> wp)
+            sim = c1im + ((a * cnim) >> wp)
+            while abs(a) > MIN:
+                b = (b*x2) >> wp
+                a = (a*b) >> wp
+                t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+                t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+                t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+                t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+                cnre = t1
+                cnim = t2
+                snre = t3
+                snim = t4
+                sre += ((a * cnre) >> wp)
+                sim += ((a * cnim) >> wp)
+            sre = (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+        # case z and q complex
+        else:
+            wp = ctx.prec + extra2
+            xre = ctx.to_fixed(ctx._re(q), wp)
+            xim = ctx.to_fixed(ctx._im(q), wp)
+            x2re = (xre*xre - xim*xim) >> wp
+            x2im = (xre*xim) >> (wp - 1)
+            are = bre = x2re
+            aim = bim = x2im
+            prec0 = ctx.prec
+            ctx.prec = wp
+            # cos(z), sin(z) with z complex
+            c1, s1 = ctx.cos_sin(z)
+            ctx.prec = prec0
+            cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+            cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+            snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+            snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+            c2re = (c1re*c1re - c1im*c1im - s1re*s1re + s1im*s1im) >> wp
+            c2im = (c1re*c1im - s1re*s1im) >> (wp - 1)
+            s2re = (c1re*s1re - c1im*s1im) >> (wp - 1)
+            s2im = (c1re*s1im + c1im*s1re) >> (wp - 1)
+            t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+            t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+            t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+            t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+            cnre = t1
+            cnim = t2
+            snre = t3
+            snim = t4
+            n = 1
+            termre = c1re
+            termim = c1im
+            sre = c1re + ((are * cnre - aim * cnim) >> wp)
+            sim = c1im + ((are * cnim + aim * cnre) >> wp)
+            n = 3
+            termre = ((are * cnre - aim * cnim) >> wp)
+            termim = ((are * cnim + aim * cnre) >> wp)
+            sre = c1re + ((are * cnre - aim * cnim) >> wp)
+            sim = c1im + ((are * cnim + aim * cnre) >> wp)
+            n = 5
+            while are**2 + aim**2 > MIN:
+                bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                           (bre * x2im + bim * x2re) >> wp
+                are, aim = (are * bre - aim * bim) >> wp,   \
+                           (are * bim + aim * bre) >> wp
+                #cn, sn = (cn*c1 - sn*s1) >> wp, (sn*c1 + cn*s1) >> wp
+                t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+                t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+                t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+                t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+                cnre = t1
+                cnim = t2
+                snre = t3
+                snim = t4
+                termre = ((are * cnre - aim * cnim) >> wp)
+                termim = ((aim * cnre + are * cnim) >> wp)
+                sre += ((are * cnre - aim * cnim) >> wp)
+                sim += ((aim * cnre + are * cnim) >> wp)
+                n += 2
+            sre = (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+    s *= ctx.nthroot(q, 4)
+    return s
+@defun
+def _djacobi_theta2(ctx, z, q, nd):
+    MIN = 2
+    extra1 = 10
+    extra2 = 20
+    if (not ctx._im(q)) and (not ctx._im(z)):
+        wp = ctx.prec + extra1
+        x = ctx.to_fixed(ctx._re(q), wp)
+        x2 = (x*x) >> wp
+        a = b = x2
+        c1, s1 = ctx.cos_sin(ctx._re(z), prec=wp)
+        cn = c1 = ctx.to_fixed(c1, wp)
+        sn = s1 = ctx.to_fixed(s1, wp)
+        c2 = (c1*c1 - s1*s1) >> wp
+        s2 = (c1 * s1) >> (wp - 1)
+        cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+        if (nd&1):
+            s = s1 + ((a * sn * 3**nd) >> wp)
+        else:
+            s = c1 + ((a * cn * 3**nd) >> wp)
+        n = 2
+        while abs(a) > MIN:
+            b = (b*x2) >> wp
+            a = (a*b) >> wp
+            cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+            if nd&1:
+                s += (a * sn * (2*n+1)**nd) >> wp
+            else:
+                s += (a * cn * (2*n+1)**nd) >> wp
+            n += 1
+        s = -(s << 1)
+        s = ctx.ldexp(s, -wp)
+        # case z real, q complex
+    elif not ctx._im(z):
+        wp = ctx.prec + extra2
+        xre = ctx.to_fixed(ctx._re(q), wp)
+        xim = ctx.to_fixed(ctx._im(q), wp)
+        x2re = (xre*xre - xim*xim) >> wp
+        x2im = (xre*xim) >> (wp - 1)
+        are = bre = x2re
+        aim = bim = x2im
+        c1, s1 = ctx.cos_sin(ctx._re(z), prec=wp)
+        cn = c1 = ctx.to_fixed(c1, wp)
+        sn = s1 = ctx.to_fixed(s1, wp)
+        c2 = (c1*c1 - s1*s1) >> wp
+        s2 = (c1 * s1) >> (wp - 1)
+        cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+        if (nd&1):
+            sre = s1 + ((are * sn * 3**nd) >> wp)
+            sim = ((aim * sn * 3**nd) >> wp)
+        else:
+            sre = c1 + ((are * cn * 3**nd) >> wp)
+            sim = ((aim * cn * 3**nd) >> wp)
+        n = 5
+        while are**2 + aim**2 > MIN:
+            bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                       (bre * x2im + bim * x2re) >> wp
+            are, aim = (are * bre - aim * bim) >> wp,   \
+                       (are * bim + aim * bre) >> wp
+            cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+            if (nd&1):
+                sre += ((are * sn * n**nd) >> wp)
+                sim += ((aim * sn * n**nd) >> wp)
+            else:
+                sre += ((are * cn * n**nd) >> wp)
+                sim += ((aim * cn * n**nd) >> wp)
+            n += 2
+        sre = -(sre << 1)
+        sim = -(sim << 1)
+        sre = ctx.ldexp(sre, -wp)
+        sim = ctx.ldexp(sim, -wp)
+        s = ctx.mpc(sre, sim)
+    #case z complex, q real
+    elif not ctx._im(q):
+        wp = ctx.prec + extra2
+        x = ctx.to_fixed(ctx._re(q), wp)
+        x2 = (x*x) >> wp
+        a = b = x2
+        prec0 = ctx.prec
+        ctx.prec = wp
+        c1, s1 = ctx.cos_sin(z)
+        ctx.prec = prec0
+        cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+        cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+        snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+        snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+        #c2 = (c1*c1 - s1*s1) >> wp
+        c2re = (c1re*c1re - c1im*c1im - s1re*s1re + s1im*s1im) >> wp
+        c2im = (c1re*c1im - s1re*s1im) >> (wp - 1)
+        #s2 = (c1 * s1) >> (wp - 1)
+        s2re = (c1re*s1re - c1im*s1im) >> (wp - 1)
+        s2im = (c1re*s1im + c1im*s1re) >> (wp - 1)
+        #cn, sn = (cn*c2 - sn*s2) >> wp, (sn*c2 + cn*s2) >> wp
+        t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+        t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+        t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+        t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+        cnre = t1
+        cnim = t2
+        snre = t3
+        snim = t4
+        if (nd&1):
+            sre = s1re + ((a * snre * 3**nd) >> wp)
+            sim = s1im + ((a * snim * 3**nd) >> wp)
+        else:
+            sre = c1re + ((a * cnre * 3**nd) >> wp)
+            sim = c1im + ((a * cnim * 3**nd) >> wp)
+        n = 5
+        while abs(a) > MIN:
+            b = (b*x2) >> wp
+            a = (a*b) >> wp
+            t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+            t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+            t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+            t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+            cnre = t1
+            cnim = t2
+            snre = t3
+            snim = t4
+            if (nd&1):
+                sre += ((a * snre * n**nd) >> wp)
+                sim += ((a * snim * n**nd) >> wp)
+            else:
+                sre += ((a * cnre * n**nd) >> wp)
+                sim += ((a * cnim * n**nd) >> wp)
+            n += 2
+        sre = -(sre << 1)
+        sim = -(sim << 1)
+        sre = ctx.ldexp(sre, -wp)
+        sim = ctx.ldexp(sim, -wp)
+        s = ctx.mpc(sre, sim)
+    # case z and q complex
+    else:
+        wp = ctx.prec + extra2
+        xre = ctx.to_fixed(ctx._re(q), wp)
+        xim = ctx.to_fixed(ctx._im(q), wp)
+        x2re = (xre*xre - xim*xim) >> wp
+        x2im = (xre*xim) >> (wp - 1)
+        are = bre = x2re
+        aim = bim = x2im
+        prec0 = ctx.prec
+        ctx.prec = wp
+        # cos(2*z), sin(2*z) with z complex
+        c1, s1 = ctx.cos_sin(z)
+        ctx.prec = prec0
+        cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+        cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+        snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+        snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+        c2re = (c1re*c1re - c1im*c1im - s1re*s1re + s1im*s1im) >> wp
+        c2im = (c1re*c1im - s1re*s1im) >> (wp - 1)
+        s2re = (c1re*s1re - c1im*s1im) >> (wp - 1)
+        s2im = (c1re*s1im + c1im*s1re) >> (wp - 1)
+        t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+        t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+        t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+        t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+        cnre = t1
+        cnim = t2
+        snre = t3
+        snim = t4
+        if (nd&1):
+            sre = s1re + (((are * snre - aim * snim) * 3**nd) >> wp)
+            sim = s1im + (((are * snim + aim * snre)* 3**nd) >> wp)
+        else:
+            sre = c1re + (((are * cnre - aim * cnim) * 3**nd) >> wp)
+            sim = c1im + (((are * cnim + aim * cnre)* 3**nd) >> wp)
+        n = 5
+        while are**2 + aim**2 > MIN:
+            bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                       (bre * x2im + bim * x2re) >> wp
+            are, aim = (are * bre - aim * bim) >> wp,   \
+                       (are * bim + aim * bre) >> wp
+            #cn, sn = (cn*c1 - sn*s1) >> wp, (sn*c1 + cn*s1) >> wp
+            t1 = (cnre*c2re - cnim*c2im - snre*s2re + snim*s2im) >> wp
+            t2 = (cnre*c2im + cnim*c2re - snre*s2im - snim*s2re) >> wp
+            t3 = (snre*c2re - snim*c2im + cnre*s2re - cnim*s2im) >> wp
+            t4 = (snre*c2im + snim*c2re + cnre*s2im + cnim*s2re) >> wp
+            cnre = t1
+            cnim = t2
+            snre = t3
+            snim = t4
+            if (nd&1):
+                sre += (((are * snre - aim * snim) * n**nd) >> wp)
+                sim += (((aim * snre + are * snim) * n**nd) >> wp)
+            else:
+                sre += (((are * cnre - aim * cnim) * n**nd) >> wp)
+                sim += (((aim * cnre + are * cnim) * n**nd) >> wp)
+            n += 2
+        sre = -(sre << 1)
+        sim = -(sim << 1)
+        sre = ctx.ldexp(sre, -wp)
+        sim = ctx.ldexp(sim, -wp)
+        s = ctx.mpc(sre, sim)
+    s *= ctx.nthroot(q, 4)
+    if (nd&1):
+        return (-1)**(nd//2) * s
+    else:
+        return (-1)**(1 + nd//2) * s
+@defun
+def _jacobi_theta3(ctx, z, q):
+    extra1 = 10
+    extra2 = 20
+    MIN = 2
+    if z == ctx.zero:
+        if not ctx._im(q):
+            wp = ctx.prec + extra1
+            x = ctx.to_fixed(ctx._re(q), wp)
+            s = x
+            a = b = x
+            x2 = (x*x) >> wp
+            while abs(a) > MIN:
+                b = (b*x2) >> wp
+                a = (a*b) >> wp
+                s += a
+            s = (1 << wp) + (s << 1)
+            s = ctx.ldexp(s, -wp)
+            return s
+        else:
+            wp = ctx.prec + extra1
+            xre = ctx.to_fixed(ctx._re(q), wp)
+            xim = ctx.to_fixed(ctx._im(q), wp)
+            x2re = (xre*xre - xim*xim) >> wp
+            x2im = (xre*xim) >> (wp - 1)
+            sre = are = bre = xre
+            sim = aim = bim = xim
+            while are**2 + aim**2 > MIN:
+                bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                           (bre * x2im + bim * x2re) >> wp
+                are, aim = (are * bre - aim * bim) >> wp,   \
+                           (are * bim + aim * bre) >> wp
+                sre += are
+                sim += aim
+            sre = (1 << wp) + (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+            return s
+    else:
+        if (not ctx._im(q)) and (not ctx._im(z)):
+            s = 0
+            wp = ctx.prec + extra1
+            x = ctx.to_fixed(ctx._re(q), wp)
+            a = b = x
+            x2 = (x*x) >> wp
+            c1, s1 = ctx.cos_sin(ctx._re(z)*2, prec=wp)
+            c1 = ctx.to_fixed(c1, wp)
+            s1 = ctx.to_fixed(s1, wp)
+            cn = c1
+            sn = s1
+            s += (a * cn) >> wp
+            while abs(a) > MIN:
+                b = (b*x2) >> wp
+                a = (a*b) >> wp
+                cn, sn = (cn*c1 - sn*s1) >> wp, (sn*c1 + cn*s1) >> wp
+                s += (a * cn) >> wp
+            s = (1 << wp) + (s << 1)
+            s = ctx.ldexp(s, -wp)
+            return s
+        # case z real, q complex
+        elif not ctx._im(z):
+            wp = ctx.prec + extra2
+            xre = ctx.to_fixed(ctx._re(q), wp)
+            xim = ctx.to_fixed(ctx._im(q), wp)
+            x2re = (xre*xre - xim*xim) >> wp
+            x2im = (xre*xim) >> (wp - 1)
+            are = bre = xre
+            aim = bim = xim
+            c1, s1 = ctx.cos_sin(ctx._re(z)*2, prec=wp)
+            c1 = ctx.to_fixed(c1, wp)
+            s1 = ctx.to_fixed(s1, wp)
+            cn = c1
+            sn = s1
+            sre = (are * cn) >> wp
+            sim = (aim * cn) >> wp
+            while are**2 + aim**2 > MIN:
+                bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                           (bre * x2im + bim * x2re) >> wp
+                are, aim = (are * bre - aim * bim) >> wp,   \
+                           (are * bim + aim * bre) >> wp
+                cn, sn = (cn*c1 - sn*s1) >> wp, (sn*c1 + cn*s1) >> wp
+                sre += (are * cn) >> wp
+                sim += (aim * cn) >> wp
+            sre = (1 << wp) + (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+            return s
+        #case z complex, q real
+        elif not ctx._im(q):
+            wp = ctx.prec + extra2
+            x = ctx.to_fixed(ctx._re(q), wp)
+            a = b = x
+            x2 = (x*x) >> wp
+            prec0 = ctx.prec
+            ctx.prec = wp
+            c1, s1 = ctx.cos_sin(2*z)
+            ctx.prec = prec0
+            cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+            cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+            snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+            snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+            sre = (a * cnre) >> wp
+            sim = (a * cnim) >> wp
+            while abs(a) > MIN:
+                b = (b*x2) >> wp
+                a = (a*b) >> wp
+                t1 = (cnre*c1re - cnim*c1im - snre*s1re + snim*s1im) >> wp
+                t2 = (cnre*c1im + cnim*c1re - snre*s1im - snim*s1re) >> wp
+                t3 = (snre*c1re - snim*c1im + cnre*s1re - cnim*s1im) >> wp
+                t4 = (snre*c1im + snim*c1re + cnre*s1im + cnim*s1re) >> wp
+                cnre = t1
+                cnim = t2
+                snre = t3
+                snim = t4
+                sre += (a * cnre) >> wp
+                sim += (a * cnim) >> wp
+            sre = (1 << wp) + (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+            return s
+        # case z and q complex
+        else:
+            wp = ctx.prec + extra2
+            xre = ctx.to_fixed(ctx._re(q), wp)
+            xim = ctx.to_fixed(ctx._im(q), wp)
+            x2re = (xre*xre - xim*xim) >> wp
+            x2im = (xre*xim) >> (wp - 1)
+            are = bre = xre
+            aim = bim = xim
+            prec0 = ctx.prec
+            ctx.prec = wp
+            # cos(2*z), sin(2*z) with z complex
+            c1, s1 = ctx.cos_sin(2*z)
+            ctx.prec = prec0
+            cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+            cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+            snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+            snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+            sre = (are * cnre - aim * cnim) >> wp
+            sim = (aim * cnre + are * cnim) >> wp
+            while are**2 + aim**2 > MIN:
+                bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                           (bre * x2im + bim * x2re) >> wp
+                are, aim = (are * bre - aim * bim) >> wp,   \
+                           (are * bim + aim * bre) >> wp
+                t1 = (cnre*c1re - cnim*c1im - snre*s1re + snim*s1im) >> wp
+                t2 = (cnre*c1im + cnim*c1re - snre*s1im - snim*s1re) >> wp
+                t3 = (snre*c1re - snim*c1im + cnre*s1re - cnim*s1im) >> wp
+                t4 = (snre*c1im + snim*c1re + cnre*s1im + cnim*s1re) >> wp
+                cnre = t1
+                cnim = t2
+                snre = t3
+                snim = t4
+                sre += (are * cnre - aim * cnim) >> wp
+                sim += (aim * cnre + are * cnim) >> wp
+            sre = (1 << wp) + (sre << 1)
+            sim = (sim << 1)
+            sre = ctx.ldexp(sre, -wp)
+            sim = ctx.ldexp(sim, -wp)
+            s = ctx.mpc(sre, sim)
+            return s
+@defun
+def _djacobi_theta3(ctx, z, q, nd):
+    """nd=1,2,3 order of the derivative with respect to z"""
+    MIN = 2
+    extra1 = 10
+    extra2 = 20
+    if (not ctx._im(q)) and (not ctx._im(z)):
+        s = 0
+        wp = ctx.prec + extra1
+        x = ctx.to_fixed(ctx._re(q), wp)
+        a = b = x
+        x2 = (x*x) >> wp
+        c1, s1 = ctx.cos_sin(ctx._re(z)*2, prec=wp)
+        c1 = ctx.to_fixed(c1, wp)
+        s1 = ctx.to_fixed(s1, wp)
+        cn = c1
+        sn = s1
+        if (nd&1):
+            s += (a * sn) >> wp
+        else:
+            s += (a * cn) >> wp
+        n = 2
+        while abs(a) > MIN:
+            b = (b*x2) >> wp
+            a = (a*b) >> wp
+            cn, sn = (cn*c1 - sn*s1) >> wp, (sn*c1 + cn*s1) >> wp
+            if nd&1:
+                s += (a * sn * n**nd) >> wp
+            else:
+                s += (a * cn * n**nd) >> wp
+            n += 1
+        s = -(s << (nd+1))
+        s = ctx.ldexp(s, -wp)
+    # case z real, q complex
+    elif not ctx._im(z):
+        wp = ctx.prec + extra2
+        xre = ctx.to_fixed(ctx._re(q), wp)
+        xim = ctx.to_fixed(ctx._im(q), wp)
+        x2re = (xre*xre - xim*xim) >> wp
+        x2im = (xre*xim) >> (wp - 1)
+        are = bre = xre
+        aim = bim = xim
+        c1, s1 = ctx.cos_sin(ctx._re(z)*2, prec=wp)
+        c1 = ctx.to_fixed(c1, wp)
+        s1 = ctx.to_fixed(s1, wp)
+        cn = c1
+        sn = s1
+        if (nd&1):
+            sre = (are * sn) >> wp
+            sim = (aim * sn) >> wp
+        else:
+            sre = (are * cn) >> wp
+            sim = (aim * cn) >> wp
+        n = 2
+        while are**2 + aim**2 > MIN:
+            bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                       (bre * x2im + bim * x2re) >> wp
+            are, aim = (are * bre - aim * bim) >> wp,   \
+                       (are * bim + aim * bre) >> wp
+            cn, sn = (cn*c1 - sn*s1) >> wp, (sn*c1 + cn*s1) >> wp
+            if nd&1:
+                sre += (are * sn * n**nd) >> wp
+                sim += (aim * sn * n**nd) >> wp
+            else:
+                sre += (are * cn * n**nd) >> wp
+                sim += (aim * cn * n**nd) >> wp
+            n += 1
+        sre = -(sre << (nd+1))
+        sim = -(sim << (nd+1))
+        sre = ctx.ldexp(sre, -wp)
+        sim = ctx.ldexp(sim, -wp)
+        s = ctx.mpc(sre, sim)
+    #case z complex, q real
+    elif not ctx._im(q):
+        wp = ctx.prec + extra2
+        x = ctx.to_fixed(ctx._re(q), wp)
+        a = b = x
+        x2 = (x*x) >> wp
+        prec0 = ctx.prec
+        ctx.prec = wp
+        c1, s1 = ctx.cos_sin(2*z)
+        ctx.prec = prec0
+        cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+        cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+        snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+        snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+        if (nd&1):
+            sre = (a * snre) >> wp
+            sim = (a * snim) >> wp
+        else:
+            sre = (a * cnre) >> wp
+            sim = (a * cnim) >> wp
+        n = 2
+        while abs(a) > MIN:
+            b = (b*x2) >> wp
+            a = (a*b) >> wp
+            t1 = (cnre*c1re - cnim*c1im - snre*s1re + snim*s1im) >> wp
+            t2 = (cnre*c1im + cnim*c1re - snre*s1im - snim*s1re) >> wp
+            t3 = (snre*c1re - snim*c1im + cnre*s1re - cnim*s1im) >> wp
+            t4 = (snre*c1im + snim*c1re + cnre*s1im + cnim*s1re) >> wp
+            cnre = t1
+            cnim = t2
+            snre = t3
+            snim = t4
+            if (nd&1):
+                sre += (a * snre * n**nd) >> wp
+                sim += (a * snim * n**nd) >> wp
+            else:
+                sre += (a * cnre * n**nd) >> wp
+                sim += (a * cnim * n**nd) >> wp
+            n += 1
+        sre = -(sre << (nd+1))
+        sim = -(sim << (nd+1))
+        sre = ctx.ldexp(sre, -wp)
+        sim = ctx.ldexp(sim, -wp)
+        s = ctx.mpc(sre, sim)
+    # case z and q complex
+    else:
+        wp = ctx.prec + extra2
+        xre = ctx.to_fixed(ctx._re(q), wp)
+        xim = ctx.to_fixed(ctx._im(q), wp)
+        x2re = (xre*xre - xim*xim) >> wp
+        x2im = (xre*xim) >> (wp - 1)
+        are = bre = xre
+        aim = bim = xim
+        prec0 = ctx.prec
+        ctx.prec = wp
+        # cos(2*z), sin(2*z) with z complex
+        c1, s1 = ctx.cos_sin(2*z)
+        ctx.prec = prec0
+        cnre = c1re = ctx.to_fixed(ctx._re(c1), wp)
+        cnim = c1im = ctx.to_fixed(ctx._im(c1), wp)
+        snre = s1re = ctx.to_fixed(ctx._re(s1), wp)
+        snim = s1im = ctx.to_fixed(ctx._im(s1), wp)
+        if (nd&1):
+            sre = (are * snre - aim * snim) >> wp
+            sim = (aim * snre + are * snim) >> wp
+        else:
+            sre = (are * cnre - aim * cnim) >> wp
+            sim = (aim * cnre + are * cnim) >> wp
+        n = 2
+        while are**2 + aim**2 > MIN:
+            bre, bim = (bre * x2re - bim * x2im) >> wp, \
+                       (bre * x2im + bim * x2re) >> wp
+            are, aim = (are * bre - aim * bim) >> wp,   \
+                       (are * bim + aim * bre) >> wp
+            t1 = (cnre*c1re - cnim*c1im - snre*s1re + snim*s1im) >> wp
+            t2 = (cnre*c1im + cnim*c1re - snre*s1im - snim*s1re) >> wp
+            t3 = (snre*c1re - snim*c1im + cnre*s1re - cnim*s1im) >> wp
+            t4 = (snre*c1im + snim*c1re + cnre*s1im + cnim*s1re) >> wp
+            cnre = t1
+            cnim = t2
+            snre = t3
+            snim = t4
+            if(nd&1):
+                sre += ((are * snre - aim * snim) * n**nd) >> wp
+                sim += ((aim * snre + are * snim) * n**nd) >> wp
+            else:
+                sre += ((are * cnre - aim * cnim) * n**nd) >> wp
+                sim += ((aim * cnre + are * cnim) * n**nd) >> wp
+            n += 1
+        sre = -(sre << (nd+1))
+        sim = -(sim << (nd+1))
+        sre = ctx.ldexp(sre, -wp)
+        sim = ctx.ldexp(sim, -wp)
+        s = ctx.mpc(sre, sim)
+    if (nd&1):
+        return (-1)**(nd//2) * s
+    else:
+        return (-1)**(1 + nd//2) * s
+@defun
+def _jacobi_theta2a(ctx, z, q):
+    """
+    case ctx._im(z) != 0
+    theta(2, z, q) =
+    q**1/4 * Sum(q**(n*n + n) * exp(j*(2*n + 1)*z), n=-inf, inf)
+    max term for minimum (2*n+1)*log(q).real - 2* ctx._im(z)
+    n0 = int(ctx._im(z)/log(q).real - 1/2)
+    theta(2, z, q) =
+    q**1/4 * Sum(q**(n*n + n) * exp(j*(2*n + 1)*z), n=n0, inf) +
+    q**1/4 * Sum(q**(n*n + n) * exp(j*(2*n + 1)*z), n, n0-1, -inf)
+    """
+    n = n0 = int(ctx._im(z)/ctx._re(ctx.log(q)) - 1/2)
+    e2 = ctx.expj(2*z)
+    e = e0 = ctx.expj((2*n+1)*z)
+    a = q**(n*n + n)
+    # leading term
+    term = a * e
+    s = term
+    eps1 = ctx.eps*abs(term)
+    while 1:
+        n += 1
+        e = e * e2
+        term = q**(n*n + n) * e
+        if abs(term) < eps1:
+            break
+        s += term
+    e = e0
+    e2 = ctx.expj(-2*z)
+    n = n0
+    while 1:
+        n -= 1
+        e = e * e2
+        term = q**(n*n + n) * e
+        if abs(term) < eps1:
+            break
+        s += term
+    s = s * ctx.nthroot(q, 4)
+    return s
+@defun
+def _jacobi_theta3a(ctx, z, q):
+    """
+    case ctx._im(z) != 0
+    theta3(z, q) = Sum(q**(n*n) * exp(j*2*n*z), n, -inf, inf)
+    max term for n*abs(log(q).real) + ctx._im(z) ~= 0
+    n0 = int(- ctx._im(z)/abs(log(q).real))
+    """
+    n = n0 = int(-ctx._im(z)/abs(ctx._re(ctx.log(q))))
+    e2 = ctx.expj(2*z)
+    e = e0 = ctx.expj(2*n*z)
+    s = term = q**(n*n) * e
+    eps1 = ctx.eps*abs(term)
+    while 1:
+        n += 1
+        e = e * e2
+        term = q**(n*n) * e
+        if abs(term) < eps1:
+            break
+        s += term
+    e = e0
+    e2 = ctx.expj(-2*z)
+    n = n0
+    while 1:
+        n -= 1
+        e = e * e2
+        term = q**(n*n) * e
+        if abs(term) < eps1:
+            break
+        s += term
+    return s
+@defun
+def _djacobi_theta2a(ctx, z, q, nd):
+    """
+    case ctx._im(z) != 0
+    dtheta(2, z, q, nd) =
+    j* q**1/4 * Sum(q**(n*n + n) * (2*n+1)*exp(j*(2*n + 1)*z), n=-inf, inf)
+    max term for (2*n0+1)*log(q).real - 2* ctx._im(z) ~= 0
+    n0 = int(ctx._im(z)/log(q).real - 1/2)
+    """
+    n = n0 = int(ctx._im(z)/ctx._re(ctx.log(q)) - 1/2)
+    e2 = ctx.expj(2*z)
+    e = e0 = ctx.expj((2*n + 1)*z)
+    a = q**(n*n + n)
+    # leading term
+    term = (2*n+1)**nd * a * e
+    s = term
+    eps1 = ctx.eps*abs(term)
+    while 1:
+        n += 1
+        e = e * e2
+        term = (2*n+1)**nd * q**(n*n + n) * e
+        if abs(term) < eps1:
+            break
+        s += term
+    e = e0
+    e2 = ctx.expj(-2*z)
+    n = n0
+    while 1:
+        n -= 1
+        e = e * e2
+        term = (2*n+1)**nd * q**(n*n + n) * e
+        if abs(term) < eps1:
+            break
+        s += term
+    return ctx.j**nd * s * ctx.nthroot(q, 4)
+@defun
+def _djacobi_theta3a(ctx, z, q, nd):
+    """
+    case ctx._im(z) != 0
+    djtheta3(z, q, nd) = (2*j)**nd *
+      Sum(q**(n*n) * n**nd * exp(j*2*n*z), n, -inf, inf)
+    max term for minimum n*abs(log(q).real) + ctx._im(z)
+    """
+    n = n0 = int(-ctx._im(z)/abs(ctx._re(ctx.log(q))))
+    e2 = ctx.expj(2*z)
+    e = e0 = ctx.expj(2*n*z)
+    a = q**(n*n) * e
+    s = term = n**nd * a
+    if n != 0:
+        eps1 = ctx.eps*abs(term)
+    else:
+        eps1 = ctx.eps*abs(a)
+    while 1:
+        n += 1
+        e = e * e2
+        a = q**(n*n) * e
+        term = n**nd * a
+        if n != 0:
+            aterm = abs(term)
+        else:
+            aterm = abs(a)
+        if aterm < eps1:
+            break
+        s += term
+    e = e0
+    e2 = ctx.expj(-2*z)
+    n = n0
+    while 1:
+        n -= 1
+        e = e * e2
+        a = q**(n*n) * e
+        term = n**nd * a
+        if n != 0:
+            aterm = abs(term)
+        else:
+            aterm = abs(a)
+        if aterm < eps1:
+            break
+        s += term
+    return (2*ctx.j)**nd * s
+@defun
+def jtheta(ctx, n, z, q, derivative=0):
+    if derivative:
+        return ctx._djtheta(n, z, q, derivative)
+    z = ctx.convert(z)
+    q = ctx.convert(q)
+    # Implementation note
+    # If ctx._im(z) is close to zero, _jacobi_theta2 and _jacobi_theta3
+    # are used,
+    # which compute the series starting from n=0 using fixed precision
+    # numbers;
+    # otherwise  _jacobi_theta2a and _jacobi_theta3a are used, which compute
+    # the series starting from n=n0, which is the largest term.
+    # TODO: write _jacobi_theta2a and _jacobi_theta3a using fixed-point
+    if abs(q) > ctx.THETA_Q_LIM:
+        raise ValueError('abs(q) > THETA_Q_LIM = %f' % ctx.THETA_Q_LIM)
+    extra = 10
+    if z:
+        M = ctx.mag(z)
+        if M > 5 or (n == 1 and M < -5):
+            extra += 2*abs(M)
+    cz = 0.5
+    extra2 = 50
+    prec0 = ctx.prec
+    try:
+        ctx.prec += extra
+        if n == 1:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._jacobi_theta2(z - ctx.pi/2, q)
+                else:
+                    ctx.dps += 10
+                    res = ctx._jacobi_theta2a(z - ctx.pi/2, q)
+            else:
+                res = ctx._jacobi_theta2(z - ctx.pi/2, q)
+        elif n == 2:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._jacobi_theta2(z, q)
+                else:
+                    ctx.dps += 10
+                    res = ctx._jacobi_theta2a(z, q)
+            else:
+                res = ctx._jacobi_theta2(z, q)
+        elif n == 3:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._jacobi_theta3(z, q)
+                else:
+                    ctx.dps += 10
+                    res = ctx._jacobi_theta3a(z, q)
+            else:
+                res = ctx._jacobi_theta3(z, q)
+        elif n == 4:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._jacobi_theta3(z, -q)
+                else:
+                    ctx.dps += 10
+                    res = ctx._jacobi_theta3a(z, -q)
+            else:
+                res = ctx._jacobi_theta3(z, -q)
+        else:
+            raise ValueError
+    finally:
+        ctx.prec = prec0
+    return res
+@defun
+def _djtheta(ctx, n, z, q, derivative=1):
+    z = ctx.convert(z)
+    q = ctx.convert(q)
+    nd = int(derivative)
+    if abs(q) > ctx.THETA_Q_LIM:
+        raise ValueError('abs(q) > THETA_Q_LIM = %f' % ctx.THETA_Q_LIM)
+    extra = 10 + ctx.prec * nd // 10
+    if z:
+        M = ctx.mag(z)
+        if M > 5 or (n != 1 and M < -5):
+            extra += 2*abs(M)
+    cz = 0.5
+    extra2 = 50
+    prec0 = ctx.prec
+    try:
+        ctx.prec += extra
+        if n == 1:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._djacobi_theta2(z - ctx.pi/2, q, nd)
+                else:
+                    ctx.dps += 10
+                    res = ctx._djacobi_theta2a(z - ctx.pi/2, q, nd)
+            else:
+                res = ctx._djacobi_theta2(z - ctx.pi/2, q, nd)
+        elif n == 2:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._djacobi_theta2(z, q, nd)
+                else:
+                    ctx.dps += 10
+                    res = ctx._djacobi_theta2a(z, q, nd)
+            else:
+                res = ctx._djacobi_theta2(z, q, nd)
+        elif n == 3:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._djacobi_theta3(z, q, nd)
+                else:
+                    ctx.dps += 10
+                    res = ctx._djacobi_theta3a(z, q, nd)
+            else:
+                res = ctx._djacobi_theta3(z, q, nd)
+        elif n == 4:
+            if ctx._im(z):
+                if abs(ctx._im(z)) < cz * abs(ctx._re(ctx.log(q))):
+                    ctx.dps += extra2
+                    res = ctx._djacobi_theta3(z, -q, nd)
+                else:
+                    ctx.dps += 10
+                    res = ctx._djacobi_theta3a(z, -q, nd)
+            else:
+                res = ctx._djacobi_theta3(z, -q, nd)
+        else:
+            raise ValueError
+    finally:
+        ctx.prec = prec0
+    return +res

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/functions/zetazeros.py ADDED Viewed

	@@ -0,0 +1,1018 @@

+"""
+The function zetazero(n) computes the n-th nontrivial zero of zeta(s).
+The general strategy is to locate a block of Gram intervals B where we
+know exactly the number of zeros contained and which of those zeros
+is that which we search.
+If n <= 400 000 000  we know exactly the Rosser exceptions, contained
+in a list in this file. Hence for n<=400 000 000 we simply
+look at these list of exceptions. If our zero is implicated in one of
+these exceptions we have our block B.  In other case we simply locate
+the good Rosser block containing our zero.
+For n > 400 000 000 we apply the method of Turing, as complemented by
+Lehman, Brent and Trudgian  to find a suitable B.
+"""
+from .functions import defun, defun_wrapped
+def find_rosser_block_zero(ctx, n):
+    """for n<400 000 000 determines a block were one find our zero"""
+    for k in range(len(_ROSSER_EXCEPTIONS)//2):
+        a=_ROSSER_EXCEPTIONS[2*k][0]
+        b=_ROSSER_EXCEPTIONS[2*k][1]
+        if ((a<= n-2) and (n-1 <= b)):
+            t0 = ctx.grampoint(a)
+            t1 = ctx.grampoint(b)
+            v0 = ctx._fp.siegelz(t0)
+            v1 = ctx._fp.siegelz(t1)
+            my_zero_number = n-a-1
+            zero_number_block = b-a
+            pattern = _ROSSER_EXCEPTIONS[2*k+1]
+            return (my_zero_number, [a,b], [t0,t1], [v0,v1])
+    k = n-2
+    t,v,b = compute_triple_tvb(ctx, k)
+    T = [t]
+    V = [v]
+    while b < 0:
+        k -= 1
+        t,v,b = compute_triple_tvb(ctx, k)
+        T.insert(0,t)
+        V.insert(0,v)
+    my_zero_number = n-k-1
+    m = n-1
+    t,v,b = compute_triple_tvb(ctx, m)
+    T.append(t)
+    V.append(v)
+    while b < 0:
+        m += 1
+        t,v,b = compute_triple_tvb(ctx, m)
+        T.append(t)
+        V.append(v)
+    return (my_zero_number, [k,m], T, V)
+def wpzeros(t):
+    """Precision needed to compute higher zeros"""
+    wp = 53
+    if t > 3*10**8:
+        wp = 63
+    if t > 10**11:
+        wp = 70
+    if t > 10**14:
+        wp = 83
+    return wp
+def separate_zeros_in_block(ctx, zero_number_block, T, V, limitloop=None,
+    fp_tolerance=None):
+    """Separate the zeros contained in the block T, limitloop
+    determines how long one must search"""
+    if limitloop is None:
+        limitloop = ctx.inf
+    loopnumber = 0
+    variations = count_variations(V)
+    while ((variations < zero_number_block) and (loopnumber <limitloop)):
+        a = T[0]
+        v = V[0]
+        newT = [a]
+        newV = [v]
+        variations = 0
+        for n in range(1,len(T)):
+            b2 = T[n]
+            u = V[n]
+            if (u*v>0):
+                alpha = ctx.sqrt(u/v)
+                b= (alpha*a+b2)/(alpha+1)
+            else:
+                b = (a+b2)/2
+            if fp_tolerance < 10:
+                w = ctx._fp.siegelz(b)
+                if abs(w)<fp_tolerance:
+                    w = ctx.siegelz(b)
+            else:
+                w=ctx.siegelz(b)
+            if v*w<0:
+                variations += 1
+            newT.append(b)
+            newV.append(w)
+            u = V[n]
+            if u*w <0:
+                variations += 1
+            newT.append(b2)
+            newV.append(u)
+            a = b2
+            v = u
+        T = newT
+        V = newV
+        loopnumber +=1
+        if (limitloop>ITERATION_LIMIT)and(loopnumber>2)and(variations+2==zero_number_block):
+            dtMax=0
+            dtSec=0
+            kMax = 0
+            for k1 in range(1,len(T)):
+                dt = T[k1]-T[k1-1]
+                if dt > dtMax:
+                    kMax=k1
+                    dtSec = dtMax
+                    dtMax = dt
+                elif  (dt<dtMax) and(dt >dtSec):
+                    dtSec = dt
+            if dtMax>3*dtSec:
+                f = lambda x: ctx.rs_z(x,derivative=1)
+                t0=T[kMax-1]
+                t1 = T[kMax]
+                t=ctx.findroot(f,  (t0,t1), solver ='illinois',verify=False, verbose=False)
+                v = ctx.siegelz(t)
+                if (t0<t) and (t<t1) and (v*V[kMax]<0):
+                    T.insert(kMax,t)
+                    V.insert(kMax,v)
+        variations = count_variations(V)
+    if variations == zero_number_block:
+        separated = True
+    else:
+        separated = False
+    return (T,V, separated)
+def separate_my_zero(ctx, my_zero_number, zero_number_block, T, V, prec):
+    """If we know which zero of this block is mine,
+    the function separates the zero"""
+    variations = 0
+    v0 = V[0]
+    for k in range(1,len(V)):
+        v1 = V[k]
+        if v0*v1 < 0:
+            variations +=1
+            if variations == my_zero_number:
+                k0 = k
+                leftv = v0
+                rightv = v1
+        v0 = v1
+    t1 = T[k0]
+    t0 = T[k0-1]
+    ctx.prec = prec
+    wpz = wpzeros(my_zero_number*ctx.log(my_zero_number))
+    guard = 4*ctx.mag(my_zero_number)
+    precs = [ctx.prec+4]
+    index=0
+    while precs[0] > 2*wpz:
+        index +=1
+        precs = [precs[0] // 2 +3+2*index] + precs
+    ctx.prec = precs[0] + guard
+    r = ctx.findroot(lambda x:ctx.siegelz(x), (t0,t1), solver ='illinois', verbose=False)
+    #print "first step at", ctx.dps, "digits"
+    z=ctx.mpc(0.5,r)
+    for prec in precs[1:]:
+        ctx.prec = prec + guard
+        #print "refining to", ctx.dps, "digits"
+        znew = z - ctx.zeta(z) / ctx.zeta(z, derivative=1)
+        #print "difference", ctx.nstr(abs(z-znew))
+        z=ctx.mpc(0.5,ctx.im(znew))
+    return ctx.im(z)
+def sure_number_block(ctx, n):
+    """The number of good Rosser blocks needed to apply
+    Turing method
+    References:
+    R. P. Brent, On the Zeros of the Riemann Zeta Function
+    in the Critical Strip, Math. Comp. 33 (1979) 1361--1372
+    T. Trudgian, Improvements to Turing Method, Math. Comp."""
+    if n < 9*10**5:
+        return(2)
+    g = ctx.grampoint(n-100)
+    lg = ctx._fp.ln(g)
+    brent = 0.0061 * lg**2 +0.08*lg
+    trudgian = 0.0031 * lg**2 +0.11*lg
+    N = ctx.ceil(min(brent,trudgian))
+    N = int(N)
+    return N
+def compute_triple_tvb(ctx, n):
+    t = ctx.grampoint(n)
+    v = ctx._fp.siegelz(t)
+    if ctx.mag(abs(v))<ctx.mag(t)-45:
+        v = ctx.siegelz(t)
+    b = v*(-1)**n
+    return t,v,b
+ITERATION_LIMIT = 4
+def search_supergood_block(ctx, n, fp_tolerance):
+    """To use for n>400 000 000"""
+    sb = sure_number_block(ctx, n)
+    number_goodblocks = 0
+    m2 = n-1
+    t, v, b = compute_triple_tvb(ctx, m2)
+    Tf = [t]
+    Vf = [v]
+    while b < 0:
+        m2 += 1
+        t,v,b = compute_triple_tvb(ctx, m2)
+        Tf.append(t)
+        Vf.append(v)
+    goodpoints = [m2]
+    T = [t]
+    V = [v]
+    while number_goodblocks < 2*sb:
+        m2 += 1
+        t, v, b = compute_triple_tvb(ctx, m2)
+        T.append(t)
+        V.append(v)
+        while b < 0:
+            m2 += 1
+            t,v,b = compute_triple_tvb(ctx, m2)
+            T.append(t)
+            V.append(v)
+        goodpoints.append(m2)
+        zn = len(T)-1
+        A, B, separated =\
+           separate_zeros_in_block(ctx, zn, T, V, limitloop=ITERATION_LIMIT,
+                fp_tolerance=fp_tolerance)
+        Tf.pop()
+        Tf.extend(A)
+        Vf.pop()
+        Vf.extend(B)
+        if separated:
+            number_goodblocks += 1
+        else:
+            number_goodblocks = 0
+        T = [t]
+        V = [v]
+    # Now the same procedure to the left
+    number_goodblocks = 0
+    m2 = n-2
+    t, v, b = compute_triple_tvb(ctx, m2)
+    Tf.insert(0,t)
+    Vf.insert(0,v)
+    while b < 0:
+        m2 -= 1
+        t,v,b = compute_triple_tvb(ctx, m2)
+        Tf.insert(0,t)
+        Vf.insert(0,v)
+    goodpoints.insert(0,m2)
+    T = [t]
+    V = [v]
+    while number_goodblocks < 2*sb:
+        m2 -= 1
+        t, v, b = compute_triple_tvb(ctx, m2)
+        T.insert(0,t)
+        V.insert(0,v)
+        while b < 0:
+            m2 -= 1
+            t,v,b = compute_triple_tvb(ctx, m2)
+            T.insert(0,t)
+            V.insert(0,v)
+        goodpoints.insert(0,m2)
+        zn = len(T)-1
+        A, B, separated =\
+           separate_zeros_in_block(ctx, zn, T, V, limitloop=ITERATION_LIMIT, fp_tolerance=fp_tolerance)
+        A.pop()
+        Tf = A+Tf
+        B.pop()
+        Vf = B+Vf
+        if separated:
+            number_goodblocks += 1
+        else:
+            number_goodblocks = 0
+        T = [t]
+        V = [v]
+    r = goodpoints[2*sb]
+    lg = len(goodpoints)
+    s = goodpoints[lg-2*sb-1]
+    tr, vr, br = compute_triple_tvb(ctx, r)
+    ar = Tf.index(tr)
+    ts, vs, bs = compute_triple_tvb(ctx, s)
+    as1 = Tf.index(ts)
+    T = Tf[ar:as1+1]
+    V = Vf[ar:as1+1]
+    zn = s-r
+    A, B, separated =\
+       separate_zeros_in_block(ctx, zn,T,V,limitloop=ITERATION_LIMIT, fp_tolerance=fp_tolerance)
+    if separated:
+        return (n-r-1,[r,s],A,B)
+    q = goodpoints[sb]
+    lg = len(goodpoints)
+    t = goodpoints[lg-sb-1]
+    tq, vq, bq = compute_triple_tvb(ctx, q)
+    aq = Tf.index(tq)
+    tt, vt, bt = compute_triple_tvb(ctx, t)
+    at = Tf.index(tt)
+    T = Tf[aq:at+1]
+    V = Vf[aq:at+1]
+    return (n-q-1,[q,t],T,V)
+def count_variations(V):
+    count = 0
+    vold = V[0]
+    for n in range(1, len(V)):
+        vnew = V[n]
+        if vold*vnew < 0:
+            count +=1
+        vold = vnew
+    return count
+def pattern_construct(ctx, block, T, V):
+    pattern = '('
+    a = block[0]
+    b = block[1]
+    t0,v0,b0 = compute_triple_tvb(ctx, a)
+    k = 0
+    k0 = 0
+    for n in range(a+1,b+1):
+        t1,v1,b1 = compute_triple_tvb(ctx, n)
+        lgT =len(T)
+        while (k < lgT) and (T[k] <= t1):
+            k += 1
+        L = V[k0:k]
+        L.append(v1)
+        L.insert(0,v0)
+        count = count_variations(L)
+        pattern = pattern + ("%s" % count)
+        if b1 > 0:
+            pattern = pattern + ')('
+        k0 = k
+        t0,v0,b0 = t1,v1,b1
+    pattern = pattern[:-1]
+    return pattern
+@defun
+def zetazero(ctx, n, info=False, round=True):
+    r"""
+    Computes the `n`-th nontrivial zero of `\zeta(s)` on the critical line,
+    i.e. returns an approximation of the `n`-th largest complex number
+    `s = \frac{1}{2} + ti` for which `\zeta(s) = 0`. Equivalently, the
+    imaginary part `t` is a zero of the Z-function (:func:`~mpmath.siegelz`).
+    **Examples**
+    The first few zeros::
+        >>> from mpmath import *
+        >>> mp.dps = 25; mp.pretty = True
+        >>> zetazero(1)
+        (0.5 + 14.13472514173469379045725j)
+        >>> zetazero(2)
+        (0.5 + 21.02203963877155499262848j)
+        >>> zetazero(20)
+        (0.5 + 77.14484006887480537268266j)
+    Verifying that the values are zeros::
+        >>> for n in range(1,5):
+        ...     s = zetazero(n)
+        ...     chop(zeta(s)), chop(siegelz(s.imag))
+        ...
+        (0.0, 0.0)
+        (0.0, 0.0)
+        (0.0, 0.0)
+        (0.0, 0.0)
+    Negative indices give the conjugate zeros (`n = 0` is undefined)::
+        >>> zetazero(-1)
+        (0.5 - 14.13472514173469379045725j)
+    :func:`~mpmath.zetazero` supports arbitrarily large `n` and arbitrary precision::
+        >>> mp.dps = 15
+        >>> zetazero(1234567)
+        (0.5 + 727690.906948208j)
+        >>> mp.dps = 50
+        >>> zetazero(1234567)
+        (0.5 + 727690.9069482075392389420041147142092708393819935j)
+        >>> chop(zeta(_)/_)
+        0.0
+    with *info=True*, :func:`~mpmath.zetazero` gives additional information::
+        >>> mp.dps = 15
+        >>> zetazero(542964976,info=True)
+        ((0.5 + 209039046.578535j), [542964969, 542964978], 6, '(013111110)')
+    This means that the zero is between Gram points 542964969 and 542964978;
+    it is the 6-th zero between them. Finally (01311110) is the pattern
+    of zeros in this interval. The numbers indicate the number of zeros
+    in each Gram interval (Rosser blocks between parenthesis). In this case
+    there is only one Rosser block of length nine.
+    """
+    n = int(n)
+    if n < 0:
+        return ctx.zetazero(-n).conjugate()
+    if n == 0:
+        raise ValueError("n must be nonzero")
+    wpinitial = ctx.prec
+    try:
+        wpz, fp_tolerance = comp_fp_tolerance(ctx, n)
+        ctx.prec = wpz
+        if n < 400000000:
+            my_zero_number, block, T, V =\
+             find_rosser_block_zero(ctx, n)
+        else:
+            my_zero_number, block, T, V =\
+             search_supergood_block(ctx, n, fp_tolerance)
+        zero_number_block = block[1]-block[0]
+        T, V, separated = separate_zeros_in_block(ctx, zero_number_block, T, V,
+            limitloop=ctx.inf, fp_tolerance=fp_tolerance)
+        if info:
+            pattern = pattern_construct(ctx,block,T,V)
+        prec = max(wpinitial, wpz)
+        t = separate_my_zero(ctx, my_zero_number, zero_number_block,T,V,prec)
+        v = ctx.mpc(0.5,t)
+    finally:
+        ctx.prec = wpinitial
+    if round:
+        v =+v
+    if info:
+        return (v,block,my_zero_number,pattern)
+    else:
+        return v
+def gram_index(ctx, t):
+    if t > 10**13:
+        wp = 3*ctx.log(t, 10)
+    else:
+        wp = 0
+    prec = ctx.prec
+    try:
+        ctx.prec += wp
+        h = int(ctx.siegeltheta(t)/ctx.pi)
+    finally:
+        ctx.prec = prec
+    return(h)
+def count_to(ctx, t, T, V):
+    count = 0
+    vold = V[0]
+    told = T[0]
+    tnew = T[1]
+    k = 1
+    while tnew < t:
+        vnew = V[k]
+        if vold*vnew < 0:
+            count += 1
+        vold = vnew
+        k += 1
+        tnew = T[k]
+    a = ctx.siegelz(t)
+    if a*vold < 0:
+        count += 1
+    return count
+def comp_fp_tolerance(ctx, n):
+    wpz = wpzeros(n*ctx.log(n))
+    if n < 15*10**8:
+        fp_tolerance = 0.0005
+    elif n <= 10**14:
+        fp_tolerance = 0.1
+    else:
+        fp_tolerance = 100
+    return wpz, fp_tolerance
+@defun
+def nzeros(ctx, t):
+    r"""
+    Computes the number of zeros of the Riemann zeta function in
+    `(0,1) \times (0,t]`, usually denoted by `N(t)`.
+    **Examples**
+    The first zero has imaginary part between 14 and 15::
+        >>> from mpmath import *
+        >>> mp.dps = 15; mp.pretty = True
+        >>> nzeros(14)
+        0
+        >>> nzeros(15)
+        1
+        >>> zetazero(1)
+        (0.5 + 14.1347251417347j)
+    Some closely spaced zeros::
+        >>> nzeros(10**7)
+        21136125
+        >>> zetazero(21136125)
+        (0.5 + 9999999.32718175j)
+        >>> zetazero(21136126)
+        (0.5 + 10000000.2400236j)
+        >>> nzeros(545439823.215)
+        1500000001
+        >>> zetazero(1500000001)
+        (0.5 + 545439823.201985j)
+        >>> zetazero(1500000002)
+        (0.5 + 545439823.325697j)
+    This confirms the data given by J. van de Lune,
+    H. J. J. te Riele and D. T. Winter in 1986.
+    """
+    if t < 14.1347251417347:
+        return 0
+    x = gram_index(ctx, t)
+    k = int(ctx.floor(x))
+    wpinitial = ctx.prec
+    wpz, fp_tolerance = comp_fp_tolerance(ctx, k)
+    ctx.prec = wpz
+    a = ctx.siegelz(t)
+    if k == -1 and a < 0:
+        return 0
+    elif k == -1 and a > 0:
+        return 1
+    if k+2 < 400000000:
+        Rblock = find_rosser_block_zero(ctx, k+2)
+    else:
+        Rblock = search_supergood_block(ctx, k+2, fp_tolerance)
+    n1, n2 = Rblock[1]
+    if n2-n1 == 1:
+        b = Rblock[3][0]
+        if a*b > 0:
+            ctx.prec = wpinitial
+            return k+1
+        else:
+            ctx.prec = wpinitial
+            return k+2
+    my_zero_number,block, T, V = Rblock
+    zero_number_block = n2-n1
+    T, V, separated = separate_zeros_in_block(ctx,\
+                                              zero_number_block, T, V,\
+                                              limitloop=ctx.inf,\
+                                            fp_tolerance=fp_tolerance)
+    n = count_to(ctx, t, T, V)
+    ctx.prec = wpinitial
+    return n+n1+1
+@defun_wrapped
+def backlunds(ctx, t):
+    r"""
+    Computes the function
+    `S(t) = \operatorname{arg} \zeta(\frac{1}{2} + it) / \pi`.
+    See Titchmarsh Section 9.3 for details of the definition.
+    **Examples**
+        >>> from mpmath import *
+        >>> mp.dps = 15; mp.pretty = True
+        >>> backlunds(217.3)
+        0.16302205431184
+    Generally, the value is a small number. At Gram points it is an integer,
+    frequently equal to 0::
+        >>> chop(backlunds(grampoint(200)))
+        0.0
+        >>> backlunds(extraprec(10)(grampoint)(211))
+        1.0
+        >>> backlunds(extraprec(10)(grampoint)(232))
+        -1.0
+    The number of zeros of the Riemann zeta function up to height `t`
+    satisfies `N(t) = \theta(t)/\pi + 1 + S(t)` (see :func:nzeros` and
+    :func:`siegeltheta`)::
+        >>> t = 1234.55
+        >>> nzeros(t)
+        842
+        >>> siegeltheta(t)/pi+1+backlunds(t)
+        842.0
+    """
+    return ctx.nzeros(t)-1-ctx.siegeltheta(t)/ctx.pi
+"""
+_ROSSER_EXCEPTIONS is a list of all  exceptions to
+Rosser's rule for n <= 400 000 000.
+Alternately the  entry is of type   [n,m], or a string.
+The string is the zero pattern of the Block and the relevant
+adjacent.  For example (010)3 corresponds to a block
+composed of three Gram intervals, the first ant third without
+a zero and the intermediate with a zero. The next Gram interval
+contain three zeros. So that in total we have 4 zeros in 4 Gram
+blocks. n and m are the indices of the Gram points  of this
+interval of four Gram intervals. The Rosser exception is therefore
+formed by the three Gram intervals that are signaled between
+parenthesis.
+We have included also some Rosser's exceptions beyond n=400 000 000
+that are noted in the literature by some reason.
+The list is composed from the data published in the references:
+R. P. Brent, J. van de Lune, H. J. J. te Riele, D. T. Winter,
+'On the Zeros of the Riemann Zeta Function in the Critical Strip. II',
+Math. Comp. 39 (1982) 681--688.
+See also Corrigenda in Math. Comp. 46 (1986) 771.
+J. van de Lune, H. J. J. te Riele,
+'On the Zeros of the Riemann Zeta Function in the Critical Strip. III',
+Math. Comp. 41 (1983) 759--767.
+See also  Corrigenda in Math. Comp. 46 (1986) 771.
+J. van de Lune,
+'Sums of Equal Powers of Positive Integers',
+Dissertation,
+Vrije Universiteit te Amsterdam, Centrum voor Wiskunde en Informatica,
+Amsterdam, 1984.
+Thanks to the authors all this papers and those others that have
+contributed to make this possible.
+"""
+_ROSSER_EXCEPTIONS = \
+[[13999525, 13999528], '(00)3',
+[30783329, 30783332], '(00)3',
+[30930926, 30930929], '3(00)',
+[37592215, 37592218], '(00)3',
+[40870156, 40870159], '(00)3',
+[43628107, 43628110], '(00)3',
+[46082042, 46082045], '(00)3',
+[46875667, 46875670], '(00)3',
+[49624540, 49624543], '3(00)',
+[50799238, 50799241], '(00)3',
+[55221453, 55221456], '3(00)',
+[56948779, 56948782], '3(00)',
+[60515663, 60515666], '(00)3',
+[61331766, 61331770], '(00)40',
+[69784843, 69784846], '3(00)',
+[75052114, 75052117], '(00)3',
+[79545240, 79545243], '3(00)',
+[79652247, 79652250], '3(00)',
+[83088043, 83088046], '(00)3',
+[83689522, 83689525], '3(00)',
+[85348958, 85348961], '(00)3',
+[86513820, 86513823], '(00)3',
+[87947596, 87947599], '3(00)',
+[88600095, 88600098], '(00)3',
+[93681183, 93681186], '(00)3',
+[100316551, 100316554], '3(00)',
+[100788444, 100788447], '(00)3',
+[106236172, 106236175], '(00)3',
+[106941327, 106941330], '3(00)',
+[107287955, 107287958], '(00)3',
+[107532016, 107532019], '3(00)',
+[110571044, 110571047], '(00)3',
+[111885253, 111885256], '3(00)',
+[113239783, 113239786], '(00)3',
+[120159903, 120159906], '(00)3',
+[121424391, 121424394], '3(00)',
+[121692931, 121692934], '3(00)',
+[121934170, 121934173], '3(00)',
+[122612848, 122612851], '3(00)',
+[126116567, 126116570], '(00)3',
+[127936513, 127936516], '(00)3',
+[128710277, 128710280], '3(00)',
+[129398902, 129398905], '3(00)',
+[130461096, 130461099], '3(00)',
+[131331947, 131331950], '3(00)',
+[137334071, 137334074], '3(00)',
+[137832603, 137832606], '(00)3',
+[138799471, 138799474], '3(00)',
+[139027791, 139027794], '(00)3',
+[141617806, 141617809], '(00)3',
+[144454931, 144454934], '(00)3',
+[145402379, 145402382], '3(00)',
+[146130245, 146130248], '3(00)',
+[147059770, 147059773], '(00)3',
+[147896099, 147896102], '3(00)',
+[151097113, 151097116], '(00)3',
+[152539438, 152539441], '(00)3',
+[152863168, 152863171], '3(00)',
+[153522726, 153522729], '3(00)',
+[155171524, 155171527], '3(00)',
+[155366607, 155366610], '(00)3',
+[157260686, 157260689], '3(00)',
+[157269224, 157269227], '(00)3',
+[157755123, 157755126], '(00)3',
+[158298484, 158298487], '3(00)',
+[160369050, 160369053], '3(00)',
+[162962787, 162962790], '(00)3',
+[163724709, 163724712], '(00)3',
+[164198113, 164198116], '3(00)',
+[164689301, 164689305], '(00)40',
+[164880228, 164880231], '3(00)',
+[166201932, 166201935], '(00)3',
+[168573836, 168573839], '(00)3',
+[169750763, 169750766], '(00)3',
+[170375507, 170375510], '(00)3',
+[170704879, 170704882], '3(00)',
+[172000992, 172000995], '3(00)',
+[173289941, 173289944], '(00)3',
+[173737613, 173737616], '3(00)',
+[174102513, 174102516], '(00)3',
+[174284990, 174284993], '(00)3',
+[174500513, 174500516], '(00)3',
+[175710609, 175710612], '(00)3',
+[176870843, 176870846], '3(00)',
+[177332732, 177332735], '3(00)',
+[177902861, 177902864], '3(00)',
+[179979095, 179979098], '(00)3',
+[181233726, 181233729], '3(00)',
+[181625435, 181625438], '(00)3',
+[182105255, 182105259], '22(00)',
+[182223559, 182223562], '3(00)',
+[191116404, 191116407], '3(00)',
+[191165599, 191165602], '3(00)',
+[191297535, 191297539], '(00)22',
+[192485616, 192485619], '(00)3',
+[193264634, 193264638], '22(00)',
+[194696968, 194696971], '(00)3',
+[195876805, 195876808], '(00)3',
+[195916548, 195916551], '3(00)',
+[196395160, 196395163], '3(00)',
+[196676303, 196676306], '(00)3',
+[197889882, 197889885], '3(00)',
+[198014122, 198014125], '(00)3',
+[199235289, 199235292], '(00)3',
+[201007375, 201007378], '(00)3',
+[201030605, 201030608], '3(00)',
+[201184290, 201184293], '3(00)',
+[201685414, 201685418], '(00)22',
+[202762875, 202762878], '3(00)',
+[202860957, 202860960], '3(00)',
+[203832577, 203832580], '3(00)',
+[205880544, 205880547], '(00)3',
+[206357111, 206357114], '(00)3',
+[207159767, 207159770], '3(00)',
+[207167343, 207167346], '3(00)',
+[207482539, 207482543], '3(010)',
+[207669540, 207669543], '3(00)',
+[208053426, 208053429], '(00)3',
+[208110027, 208110030], '3(00)',
+[209513826, 209513829], '3(00)',
+[212623522, 212623525], '(00)3',
+[213841715, 213841718], '(00)3',
+[214012333, 214012336], '(00)3',
+[214073567, 214073570], '(00)3',
+[215170600, 215170603], '3(00)',
+[215881039, 215881042], '3(00)',
+[216274604, 216274607], '3(00)',
+[216957120, 216957123], '3(00)',
+[217323208, 217323211], '(00)3',
+[218799264, 218799267], '(00)3',
+[218803557, 218803560], '3(00)',
+[219735146, 219735149], '(00)3',
+[219830062, 219830065], '3(00)',
+[219897904, 219897907], '(00)3',
+[221205545, 221205548], '(00)3',
+[223601929, 223601932], '(00)3',
+[223907076, 223907079], '3(00)',
+[223970397, 223970400], '(00)3',
+[224874044, 224874048], '22(00)',
+[225291157, 225291160], '(00)3',
+[227481734, 227481737], '(00)3',
+[228006442, 228006445], '3(00)',
+[228357900, 228357903], '(00)3',
+[228386399, 228386402], '(00)3',
+[228907446, 228907449], '(00)3',
+[228984552, 228984555], '3(00)',
+[229140285, 229140288], '3(00)',
+[231810024, 231810027], '(00)3',
+[232838062, 232838065], '3(00)',
+[234389088, 234389091], '3(00)',
+[235588194, 235588197], '(00)3',
+[236645695, 236645698], '(00)3',
+[236962876, 236962879], '3(00)',
+[237516723, 237516727], '04(00)',
+[240004911, 240004914], '(00)3',
+[240221306, 240221309], '3(00)',
+[241389213, 241389217], '(010)3',
+[241549003, 241549006], '(00)3',
+[241729717, 241729720], '(00)3',
+[241743684, 241743687], '3(00)',
+[243780200, 243780203], '3(00)',
+[243801317, 243801320], '(00)3',
+[244122072, 244122075], '(00)3',
+[244691224, 244691227], '3(00)',
+[244841577, 244841580], '(00)3',
+[245813461, 245813464], '(00)3',
+[246299475, 246299478], '(00)3',
+[246450176, 246450179], '3(00)',
+[249069349, 249069352], '(00)3',
+[250076378, 250076381], '(00)3',
+[252442157, 252442160], '3(00)',
+[252904231, 252904234], '3(00)',
+[255145220, 255145223], '(00)3',
+[255285971, 255285974], '3(00)',
+[256713230, 256713233], '(00)3',
+[257992082, 257992085], '(00)3',
+[258447955, 258447959], '22(00)',
+[259298045, 259298048], '3(00)',
+[262141503, 262141506], '(00)3',
+[263681743, 263681746], '3(00)',
+[266527881, 266527885], '(010)3',
+[266617122, 266617125], '(00)3',
+[266628044, 266628047], '3(00)',
+[267305763, 267305766], '(00)3',
+[267388404, 267388407], '3(00)',
+[267441672, 267441675], '3(00)',
+[267464886, 267464889], '(00)3',
+[267554907, 267554910], '3(00)',
+[269787480, 269787483], '(00)3',
+[270881434, 270881437], '(00)3',
+[270997583, 270997586], '3(00)',
+[272096378, 272096381], '3(00)',
+[272583009, 272583012], '(00)3',
+[274190881, 274190884], '3(00)',
+[274268747, 274268750], '(00)3',
+[275297429, 275297432], '3(00)',
+[275545476, 275545479], '3(00)',
+[275898479, 275898482], '3(00)',
+[275953000, 275953003], '(00)3',
+[277117197, 277117201], '(00)22',
+[277447310, 277447313], '3(00)',
+[279059657, 279059660], '3(00)',
+[279259144, 279259147], '3(00)',
+[279513636, 279513639], '3(00)',
+[279849069, 279849072], '3(00)',
+[280291419, 280291422], '(00)3',
+[281449425, 281449428], '3(00)',
+[281507953, 281507956], '3(00)',
+[281825600, 281825603], '(00)3',
+[282547093, 282547096], '3(00)',
+[283120963, 283120966], '3(00)',
+[283323493, 283323496], '(00)3',
+[284764535, 284764538], '3(00)',
+[286172639, 286172642], '3(00)',
+[286688824, 286688827], '(00)3',
+[287222172, 287222175], '3(00)',
+[287235534, 287235537], '3(00)',
+[287304861, 287304864], '3(00)',
+[287433571, 287433574], '(00)3',
+[287823551, 287823554], '(00)3',
+[287872422, 287872425], '3(00)',
+[288766615, 288766618], '3(00)',
+[290122963, 290122966], '3(00)',
+[290450849, 290450853], '(00)22',
+[291426141, 291426144], '3(00)',
+[292810353, 292810356], '3(00)',
+[293109861, 293109864], '3(00)',
+[293398054, 293398057], '3(00)',
+[294134426, 294134429], '3(00)',
+[294216438, 294216441], '(00)3',
+[295367141, 295367144], '3(00)',
+[297834111, 297834114], '3(00)',
+[299099969, 299099972], '3(00)',
+[300746958, 300746961], '3(00)',
+[301097423, 301097426], '(00)3',
+[301834209, 301834212], '(00)3',
+[302554791, 302554794], '(00)3',
+[303497445, 303497448], '3(00)',
+[304165344, 304165347], '3(00)',
+[304790218, 304790222], '3(010)',
+[305302352, 305302355], '(00)3',
+[306785996, 306785999], '3(00)',
+[307051443, 307051446], '3(00)',
+[307481539, 307481542], '3(00)',
+[308605569, 308605572], '3(00)',
+[309237610, 309237613], '3(00)',
+[310509287, 310509290], '(00)3',
+[310554057, 310554060], '3(00)',
+[310646345, 310646348], '3(00)',
+[311274896, 311274899], '(00)3',
+[311894272, 311894275], '3(00)',
+[312269470, 312269473], '(00)3',
+[312306601, 312306605], '(00)40',
+[312683193, 312683196], '3(00)',
+[314499804, 314499807], '3(00)',
+[314636802, 314636805], '(00)3',
+[314689897, 314689900], '3(00)',
+[314721319, 314721322], '3(00)',
+[316132890, 316132893], '3(00)',
+[316217470, 316217474], '(010)3',
+[316465705, 316465708], '3(00)',
+[316542790, 316542793], '(00)3',
+[320822347, 320822350], '3(00)',
+[321733242, 321733245], '3(00)',
+[324413970, 324413973], '(00)3',
+[325950140, 325950143], '(00)3',
+[326675884, 326675887], '(00)3',
+[326704208, 326704211], '3(00)',
+[327596247, 327596250], '3(00)',
+[328123172, 328123175], '3(00)',
+[328182212, 328182215], '(00)3',
+[328257498, 328257501], '3(00)',
+[328315836, 328315839], '(00)3',
+[328800974, 328800977], '(00)3',
+[328998509, 328998512], '3(00)',
+[329725370, 329725373], '(00)3',
+[332080601, 332080604], '(00)3',
+[332221246, 332221249], '(00)3',
+[332299899, 332299902], '(00)3',
+[332532822, 332532825], '(00)3',
+[333334544, 333334548], '(00)22',
+[333881266, 333881269], '3(00)',
+[334703267, 334703270], '3(00)',
+[334875138, 334875141], '3(00)',
+[336531451, 336531454], '3(00)',
+[336825907, 336825910], '(00)3',
+[336993167, 336993170], '(00)3',
+[337493998, 337494001], '3(00)',
+[337861034, 337861037], '3(00)',
+[337899191, 337899194], '(00)3',
+[337958123, 337958126], '(00)3',
+[342331982, 342331985], '3(00)',
+[342676068, 342676071], '3(00)',
+[347063781, 347063784], '3(00)',
+[347697348, 347697351], '3(00)',
+[347954319, 347954322], '3(00)',
+[348162775, 348162778], '3(00)',
+[349210702, 349210705], '(00)3',
+[349212913, 349212916], '3(00)',
+[349248650, 349248653], '(00)3',
+[349913500, 349913503], '3(00)',
+[350891529, 350891532], '3(00)',
+[351089323, 351089326], '3(00)',
+[351826158, 351826161], '3(00)',
+[352228580, 352228583], '(00)3',
+[352376244, 352376247], '3(00)',
+[352853758, 352853761], '(00)3',
+[355110439, 355110442], '(00)3',
+[355808090, 355808094], '(00)40',
+[355941556, 355941559], '3(00)',
+[356360231, 356360234], '(00)3',
+[356586657, 356586660], '3(00)',
+[356892926, 356892929], '(00)3',
+[356908232, 356908235], '3(00)',
+[357912730, 357912733], '3(00)',
+[358120344, 358120347], '3(00)',
+[359044096, 359044099], '(00)3',
+[360819357, 360819360], '3(00)',
+[361399662, 361399666], '(010)3',
+[362361315, 362361318], '(00)3',
+[363610112, 363610115], '(00)3',
+[363964804, 363964807], '3(00)',
+[364527375, 364527378], '(00)3',
+[365090327, 365090330], '(00)3',
+[365414539, 365414542], '3(00)',
+[366738474, 366738477], '3(00)',
+[368714778, 368714783], '04(010)',
+[368831545, 368831548], '(00)3',
+[368902387, 368902390], '(00)3',
+[370109769, 370109772], '3(00)',
+[370963333, 370963336], '3(00)',
+[372541136, 372541140], '3(010)',
+[372681562, 372681565], '(00)3',
+[373009410, 373009413], '(00)3',
+[373458970, 373458973], '3(00)',
+[375648658, 375648661], '3(00)',
+[376834728, 376834731], '3(00)',
+[377119945, 377119948], '(00)3',
+[377335703, 377335706], '(00)3',
+[378091745, 378091748], '3(00)',
+[379139522, 379139525], '3(00)',
+[380279160, 380279163], '(00)3',
+[380619442, 380619445], '3(00)',
+[381244231, 381244234], '3(00)',
+[382327446, 382327450], '(010)3',
+[382357073, 382357076], '3(00)',
+[383545479, 383545482], '3(00)',
+[384363766, 384363769], '(00)3',
+[384401786, 384401790], '22(00)',
+[385198212, 385198215], '3(00)',
+[385824476, 385824479], '(00)3',
+[385908194, 385908197], '3(00)',
+[386946806, 386946809], '3(00)',
+[387592175, 387592179], '22(00)',
+[388329293, 388329296], '(00)3',
+[388679566, 388679569], '3(00)',
+[388832142, 388832145], '3(00)',
+[390087103, 390087106], '(00)3',
+[390190926, 390190930], '(00)22',
+[390331207, 390331210], '3(00)',
+[391674495, 391674498], '3(00)',
+[391937831, 391937834], '3(00)',
+[391951632, 391951636], '(00)22',
+[392963986, 392963989], '(00)3',
+[393007921, 393007924], '3(00)',
+[393373210, 393373213], '3(00)',
+[393759572, 393759575], '(00)3',
+[394036662, 394036665], '(00)3',
+[395813866, 395813869], '(00)3',
+[395956690, 395956693], '3(00)',
+[396031670, 396031673], '3(00)',
+[397076433, 397076436], '3(00)',
+[397470601, 397470604], '3(00)',
+[398289458, 398289461], '3(00)',
+#
+[368714778, 368714783], '04(010)',
+[437953499, 437953504], '04(010)',
+[526196233, 526196238], '032(00)',
+[744719566, 744719571], '(010)40',
+[750375857, 750375862], '032(00)',
+[958241932, 958241937], '04(010)',
+[983377342, 983377347], '(00)410',
+[1003780080, 1003780085], '04(010)',
+[1070232754, 1070232759], '(00)230',
+[1209834865, 1209834870], '032(00)',
+[1257209100, 1257209105], '(00)410',
+[1368002233, 1368002238], '(00)230'
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_basic_ops.py ADDED Viewed

	@@ -0,0 +1,451 @@

+import mpmath
+from mpmath import *
+from mpmath.libmp import *
+import random
+import sys
+try:
+    long = long
+except NameError:
+    long = int
+def test_type_compare():
+    assert mpf(2) == mpc(2,0)
+    assert mpf(0) == mpc(0)
+    assert mpf(2) != mpc(2, 0.00001)
+    assert mpf(2) == 2.0
+    assert mpf(2) != 3.0
+    assert mpf(2) == 2
+    assert mpf(2) != '2.0'
+    assert mpc(2) != '2.0'
+def test_add():
+    assert mpf(2.5) + mpf(3) == 5.5
+    assert mpf(2.5) + 3 == 5.5
+    assert mpf(2.5) + 3.0 == 5.5
+    assert 3 + mpf(2.5) == 5.5
+    assert 3.0 + mpf(2.5) == 5.5
+    assert (3+0j) + mpf(2.5) == 5.5
+    assert mpc(2.5) + mpf(3) == 5.5
+    assert mpc(2.5) + 3 == 5.5
+    assert mpc(2.5) + 3.0 == 5.5
+    assert mpc(2.5) + (3+0j) == 5.5
+    assert 3 + mpc(2.5) == 5.5
+    assert 3.0 + mpc(2.5) == 5.5
+    assert (3+0j) + mpc(2.5) == 5.5
+def test_sub():
+    assert mpf(2.5) - mpf(3) == -0.5
+    assert mpf(2.5) - 3 == -0.5
+    assert mpf(2.5) - 3.0 == -0.5
+    assert 3 - mpf(2.5) == 0.5
+    assert 3.0 - mpf(2.5) == 0.5
+    assert (3+0j) - mpf(2.5) == 0.5
+    assert mpc(2.5) - mpf(3) == -0.5
+    assert mpc(2.5) - 3 == -0.5
+    assert mpc(2.5) - 3.0 == -0.5
+    assert mpc(2.5) - (3+0j) == -0.5
+    assert 3 - mpc(2.5) == 0.5
+    assert 3.0 - mpc(2.5) == 0.5
+    assert (3+0j) - mpc(2.5) == 0.5
+def test_mul():
+    assert mpf(2.5) * mpf(3) == 7.5
+    assert mpf(2.5) * 3 == 7.5
+    assert mpf(2.5) * 3.0 == 7.5
+    assert 3 * mpf(2.5) == 7.5
+    assert 3.0 * mpf(2.5) == 7.5
+    assert (3+0j) * mpf(2.5) == 7.5
+    assert mpc(2.5) * mpf(3) == 7.5
+    assert mpc(2.5) * 3 == 7.5
+    assert mpc(2.5) * 3.0 == 7.5
+    assert mpc(2.5) * (3+0j) == 7.5
+    assert 3 * mpc(2.5) == 7.5
+    assert 3.0 * mpc(2.5) == 7.5
+    assert (3+0j) * mpc(2.5) == 7.5
+def test_div():
+    assert mpf(6) / mpf(3) == 2.0
+    assert mpf(6) / 3 == 2.0
+    assert mpf(6) / 3.0 == 2.0
+    assert 6 / mpf(3) == 2.0
+    assert 6.0 / mpf(3) == 2.0
+    assert (6+0j) / mpf(3.0) == 2.0
+    assert mpc(6) / mpf(3) == 2.0
+    assert mpc(6) / 3 == 2.0
+    assert mpc(6) / 3.0 == 2.0
+    assert mpc(6) / (3+0j) == 2.0
+    assert 6 / mpc(3) == 2.0
+    assert 6.0 / mpc(3) == 2.0
+    assert (6+0j) / mpc(3) == 2.0
+def test_pow():
+    assert mpf(6) ** mpf(3) == 216.0
+    assert mpf(6) ** 3 == 216.0
+    assert mpf(6) ** 3.0 == 216.0
+    assert 6 ** mpf(3) == 216.0
+    assert 6.0 ** mpf(3) == 216.0
+    assert (6+0j) ** mpf(3.0) == 216.0
+    assert mpc(6) ** mpf(3) == 216.0
+    assert mpc(6) ** 3 == 216.0
+    assert mpc(6) ** 3.0 == 216.0
+    assert mpc(6) ** (3+0j) == 216.0
+    assert 6 ** mpc(3) == 216.0
+    assert 6.0 ** mpc(3) == 216.0
+    assert (6+0j) ** mpc(3) == 216.0
+def test_mixed_misc():
+    assert 1 + mpf(3) == mpf(3) + 1 == 4
+    assert 1 - mpf(3) == -(mpf(3) - 1) == -2
+    assert 3 * mpf(2) == mpf(2) * 3 == 6
+    assert 6 / mpf(2) == mpf(6) / 2 == 3
+    assert 1.0 + mpf(3) == mpf(3) + 1.0 == 4
+    assert 1.0 - mpf(3) == -(mpf(3) - 1.0) == -2
+    assert 3.0 * mpf(2) == mpf(2) * 3.0 == 6
+    assert 6.0 / mpf(2) == mpf(6) / 2.0 == 3
+def test_add_misc():
+    mp.dps = 15
+    assert mpf(4) + mpf(-70) == -66
+    assert mpf(1) + mpf(1.1)/80 == 1 + 1.1/80
+    assert mpf((1, 10000000000)) + mpf(3) == mpf((1, 10000000000))
+    assert mpf(3) + mpf((1, 10000000000)) == mpf((1, 10000000000))
+    assert mpf((1, -10000000000)) + mpf(3) == mpf(3)
+    assert mpf(3) + mpf((1, -10000000000)) == mpf(3)
+    assert mpf(1) + 1e-15 != 1
+    assert mpf(1) + 1e-20 == 1
+    assert mpf(1.07e-22) + 0 == mpf(1.07e-22)
+    assert mpf(0) + mpf(1.07e-22) == mpf(1.07e-22)
+def test_complex_misc():
+    # many more tests needed
+    assert 1 + mpc(2) == 3
+    assert not mpc(2).ae(2 + 1e-13)
+    assert mpc(2+1e-15j).ae(2)
+def test_complex_zeros():
+    for a in [0,2]:
+        for b in [0,3]:
+            for c in [0,4]:
+                for d in [0,5]:
+                    assert mpc(a,b)*mpc(c,d) == complex(a,b)*complex(c,d)
+def test_hash():
+    for i in range(-256, 256):
+        assert hash(mpf(i)) == hash(i)
+    assert hash(mpf(0.5)) == hash(0.5)
+    assert hash(mpc(2,3)) == hash(2+3j)
+    # Check that this doesn't fail
+    assert hash(inf)
+    # Check that overflow doesn't assign equal hashes to large numbers
+    assert hash(mpf('1e1000')) != hash('1e10000')
+    assert hash(mpc(100,'1e1000')) != hash(mpc(200,'1e1000'))
+    from mpmath.rational import mpq
+    assert hash(mp.mpq(1,3))
+    assert hash(mp.mpq(0,1)) == 0
+    assert hash(mp.mpq(-1,1)) == hash(-1)
+    assert hash(mp.mpq(1,1)) == hash(1)
+    assert hash(mp.mpq(5,1)) == hash(5)
+    assert hash(mp.mpq(1,2)) == hash(0.5)
+    if sys.version_info >= (3, 2):
+        assert hash(mpf(1)*2**2000) == hash(2**2000)
+        assert hash(mpf(1)/2**2000) == hash(mpq(1,2**2000))
+# Advanced rounding test
+def test_add_rounding():
+    mp.dps = 15
+    a = from_float(1e-50)
+    assert mpf_sub(mpf_add(fone, a, 53, round_up), fone, 53, round_up) == from_float(2.2204460492503131e-16)
+    assert mpf_sub(fone, a, 53, round_up) == fone
+    assert mpf_sub(fone, mpf_sub(fone, a, 53, round_down), 53, round_down) == from_float(1.1102230246251565e-16)
+    assert mpf_add(fone, a, 53, round_down) == fone
+def test_almost_equal():
+    assert mpf(1.2).ae(mpf(1.20000001), 1e-7)
+    assert not mpf(1.2).ae(mpf(1.20000001), 1e-9)
+    assert not mpf(-0.7818314824680298).ae(mpf(-0.774695868667929))
+def test_arithmetic_functions():
+    import operator
+    ops = [(operator.add, fadd), (operator.sub, fsub), (operator.mul, fmul),
+        (operator.truediv, fdiv)]
+    a = mpf(0.27)
+    b = mpf(1.13)
+    c = mpc(0.51+2.16j)
+    d = mpc(1.08-0.99j)
+    for x in [a,b,c,d]:
+        for y in [a,b,c,d]:
+            for op, fop in ops:
+                if fop is not fdiv:
+                    mp.prec = 200
+                    z0 = op(x,y)
+                mp.prec = 60
+                z1 = op(x,y)
+                mp.prec = 53
+                z2 = op(x,y)
+                assert fop(x, y, prec=60) == z1
+                assert fop(x, y) == z2
+                if fop is not fdiv:
+                    assert fop(x, y, prec=inf) == z0
+                    assert fop(x, y, dps=inf) == z0
+                    assert fop(x, y, exact=True) == z0
+                assert fneg(fneg(z1, exact=True), prec=inf) == z1
+                assert fneg(z1) == -(+z1)
+    mp.dps = 15
+def test_exact_integer_arithmetic():
+    # XXX: re-fix this so that all operations are tested with all rounding modes
+    random.seed(0)
+    for prec in [6, 10, 25, 40, 100, 250, 725]:
+        for rounding in ['d', 'u', 'f', 'c', 'n']:
+            mp.dps = prec
+            M = 10**(prec-2)
+            M2 = 10**(prec//2-2)
+            for i in range(10):
+                a = random.randint(-M, M)
+                b = random.randint(-M, M)
+                assert mpf(a, rounding=rounding) == a
+                assert int(mpf(a, rounding=rounding)) == a
+                assert int(mpf(str(a), rounding=rounding)) == a
+                assert mpf(a) + mpf(b) == a + b
+                assert mpf(a) - mpf(b) == a - b
+                assert -mpf(a) == -a
+                a = random.randint(-M2, M2)
+                b = random.randint(-M2, M2)
+                assert mpf(a) * mpf(b) == a*b
+                assert mpf_mul(from_int(a), from_int(b), mp.prec, rounding) == from_int(a*b)
+    mp.dps = 15
+def test_odd_int_bug():
+    assert to_int(from_int(3), round_nearest) == 3
+def test_str_1000_digits():
+    mp.dps = 1001
+    # last digit may be wrong
+    assert str(mpf(2)**0.5)[-10:-1] == '9518488472'[:9]
+    assert str(pi)[-10:-1] == '2164201989'[:9]
+    mp.dps = 15
+def test_str_10000_digits():
+    mp.dps = 10001
+    # last digit may be wrong
+    assert str(mpf(2)**0.5)[-10:-1] == '5873258351'[:9]
+    assert str(pi)[-10:-1] == '5256375678'[:9]
+    mp.dps = 15
+def test_monitor():
+    f = lambda x: x**2
+    a = []
+    b = []
+    g = monitor(f, a.append, b.append)
+    assert g(3) == 9
+    assert g(4) == 16
+    assert a[0] == ((3,), {})
+    assert b[0] == 9
+def test_nint_distance():
+    assert nint_distance(mpf(-3)) == (-3, -inf)
+    assert nint_distance(mpc(-3)) == (-3, -inf)
+    assert nint_distance(mpf(-3.1)) == (-3, -3)
+    assert nint_distance(mpf(-3.01)) == (-3, -6)
+    assert nint_distance(mpf(-3.001)) == (-3, -9)
+    assert nint_distance(mpf(-3.0001)) == (-3, -13)
+    assert nint_distance(mpf(-2.9)) == (-3, -3)
+    assert nint_distance(mpf(-2.99)) == (-3, -6)
+    assert nint_distance(mpf(-2.999)) == (-3, -9)
+    assert nint_distance(mpf(-2.9999)) == (-3, -13)
+    assert nint_distance(mpc(-3+0.1j)) == (-3, -3)
+    assert nint_distance(mpc(-3+0.01j)) == (-3, -6)
+    assert nint_distance(mpc(-3.1+0.1j)) == (-3, -3)
+    assert nint_distance(mpc(-3.01+0.01j)) == (-3, -6)
+    assert nint_distance(mpc(-3.001+0.001j)) == (-3, -9)
+    assert nint_distance(mpf(0)) == (0, -inf)
+    assert nint_distance(mpf(0.01)) == (0, -6)
+    assert nint_distance(mpf('1e-100')) == (0, -332)
+def test_floor_ceil_nint_frac():
+    mp.dps = 15
+    for n in range(-10,10):
+        assert floor(n) == n
+        assert floor(n+0.5) == n
+        assert ceil(n) == n
+        assert ceil(n+0.5) == n+1
+        assert nint(n) == n
+        # nint rounds to even
+        if n % 2 == 1:
+            assert nint(n+0.5) == n+1
+        else:
+            assert nint(n+0.5) == n
+    assert floor(inf) == inf
+    assert floor(ninf) == ninf
+    assert isnan(floor(nan))
+    assert ceil(inf) == inf
+    assert ceil(ninf) == ninf
+    assert isnan(ceil(nan))
+    assert nint(inf) == inf
+    assert nint(ninf) == ninf
+    assert isnan(nint(nan))
+    assert floor(0.1) == 0
+    assert floor(0.9) == 0
+    assert floor(-0.1) == -1
+    assert floor(-0.9) == -1
+    assert floor(10000000000.1) == 10000000000
+    assert floor(10000000000.9) == 10000000000
+    assert floor(-10000000000.1) == -10000000000-1
+    assert floor(-10000000000.9) == -10000000000-1
+    assert floor(1e-100) == 0
+    assert floor(-1e-100) == -1
+    assert floor(1e100) == 1e100
+    assert floor(-1e100) == -1e100
+    assert ceil(0.1) == 1
+    assert ceil(0.9) == 1
+    assert ceil(-0.1) == 0
+    assert ceil(-0.9) == 0
+    assert ceil(10000000000.1) == 10000000000+1
+    assert ceil(10000000000.9) == 10000000000+1
+    assert ceil(-10000000000.1) == -10000000000
+    assert ceil(-10000000000.9) == -10000000000
+    assert ceil(1e-100) == 1
+    assert ceil(-1e-100) == 0
+    assert ceil(1e100) == 1e100
+    assert ceil(-1e100) == -1e100
+    assert nint(0.1) == 0
+    assert nint(0.9) == 1
+    assert nint(-0.1) == 0
+    assert nint(-0.9) == -1
+    assert nint(10000000000.1) == 10000000000
+    assert nint(10000000000.9) == 10000000000+1
+    assert nint(-10000000000.1) == -10000000000
+    assert nint(-10000000000.9) == -10000000000-1
+    assert nint(1e-100) == 0
+    assert nint(-1e-100) == 0
+    assert nint(1e100) == 1e100
+    assert nint(-1e100) == -1e100
+    assert floor(3.2+4.6j) == 3+4j
+    assert ceil(3.2+4.6j) == 4+5j
+    assert nint(3.2+4.6j) == 3+5j
+    for n in range(-10,10):
+        assert frac(n) == 0
+    assert frac(0.25) == 0.25
+    assert frac(1.25) == 0.25
+    assert frac(2.25) == 0.25
+    assert frac(-0.25) == 0.75
+    assert frac(-1.25) == 0.75
+    assert frac(-2.25) == 0.75
+    assert frac('1e100000000000000') == 0
+    u = mpf('1e-100000000000000')
+    assert frac(u) == u
+    assert frac(-u) == 1  # rounding!
+    u = mpf('1e-400')
+    assert frac(-u, prec=0) == fsub(1, u, exact=True)
+    assert frac(3.25+4.75j) == 0.25+0.75j
+def test_isnan_etc():
+    from mpmath.rational import mpq
+    assert isnan(nan) == True
+    assert isnan(3) == False
+    assert isnan(mpf(3)) == False
+    assert isnan(inf) == False
+    assert isnan(mpc(2,nan)) == True
+    assert isnan(mpc(2,nan)) == True
+    assert isnan(mpc(nan,nan)) == True
+    assert isnan(mpc(2,2)) == False
+    assert isnan(mpc(nan,inf)) == True
+    assert isnan(mpc(inf,inf)) == False
+    assert isnan(mpq((3,2))) == False
+    assert isnan(mpq((0,1))) == False
+    assert isinf(inf) == True
+    assert isinf(-inf) == True
+    assert isinf(3) == False
+    assert isinf(nan) == False
+    assert isinf(3+4j) == False
+    assert isinf(mpc(inf)) == True
+    assert isinf(mpc(3,inf)) == True
+    assert isinf(mpc(inf,3)) == True
+    assert isinf(mpc(inf,inf)) == True
+    assert isinf(mpc(nan,inf)) == True
+    assert isinf(mpc(inf,nan)) == True
+    assert isinf(mpc(nan,nan)) == False
+    assert isinf(mpq((3,2))) == False
+    assert isinf(mpq((0,1))) == False
+    assert isnormal(3) == True
+    assert isnormal(3.5) == True
+    assert isnormal(mpf(3.5)) == True
+    assert isnormal(0) == False
+    assert isnormal(mpf(0)) == False
+    assert isnormal(0.0) == False
+    assert isnormal(inf) == False
+    assert isnormal(-inf) == False
+    assert isnormal(nan) == False
+    assert isnormal(float(inf)) == False
+    assert isnormal(mpc(0,0)) == False
+    assert isnormal(mpc(3,0)) == True
+    assert isnormal(mpc(0,3)) == True
+    assert isnormal(mpc(3,3)) == True
+    assert isnormal(mpc(0,nan)) == False
+    assert isnormal(mpc(0,inf)) == False
+    assert isnormal(mpc(3,nan)) == False
+    assert isnormal(mpc(3,inf)) == False
+    assert isnormal(mpc(3,-inf)) == False
+    assert isnormal(mpc(nan,0)) == False
+    assert isnormal(mpc(inf,0)) == False
+    assert isnormal(mpc(nan,3)) == False
+    assert isnormal(mpc(inf,3)) == False
+    assert isnormal(mpc(inf,nan)) == False
+    assert isnormal(mpc(nan,inf)) == False
+    assert isnormal(mpc(nan,nan)) == False
+    assert isnormal(mpc(inf,inf)) == False
+    assert isnormal(mpq((3,2))) == True
+    assert isnormal(mpq((0,1))) == False
+    assert isint(3) == True
+    assert isint(0) == True
+    assert isint(long(3)) == True
+    assert isint(long(0)) == True
+    assert isint(mpf(3)) == True
+    assert isint(mpf(0)) == True
+    assert isint(mpf(-3)) == True
+    assert isint(mpf(3.2)) == False
+    assert isint(3.2) == False
+    assert isint(nan) == False
+    assert isint(inf) == False
+    assert isint(-inf) == False
+    assert isint(mpc(0)) == True
+    assert isint(mpc(3)) == True
+    assert isint(mpc(3.2)) == False
+    assert isint(mpc(3,inf)) == False
+    assert isint(mpc(inf)) == False
+    assert isint(mpc(3,2)) == False
+    assert isint(mpc(0,2)) == False
+    assert isint(mpc(3,2),gaussian=True) == True
+    assert isint(mpc(3,0),gaussian=True) == True
+    assert isint(mpc(0,3),gaussian=True) == True
+    assert isint(3+4j) == False
+    assert isint(3+4j, gaussian=True) == True
+    assert isint(3+0j) == True
+    assert isint(mpq((3,2))) == False
+    assert isint(mpq((3,9))) == False
+    assert isint(mpq((9,3))) == True
+    assert isint(mpq((0,4))) == True
+    assert isint(mpq((1,1))) == True
+    assert isint(mpq((-1,1))) == True
+    assert mp.isnpint(0) == True
+    assert mp.isnpint(1) == False
+    assert mp.isnpint(-1) == True
+    assert mp.isnpint(-1.1) == False
+    assert mp.isnpint(-1.0) == True
+    assert mp.isnpint(mp.mpq(1,2)) == False
+    assert mp.isnpint(mp.mpq(-1,2)) == False
+    assert mp.isnpint(mp.mpq(-3,1)) == True
+    assert mp.isnpint(mp.mpq(0,1)) == True
+    assert mp.isnpint(mp.mpq(1,1)) == False
+    assert mp.isnpint(0+0j) == True
+    assert mp.isnpint(-1+0j) == True
+    assert mp.isnpint(-1.1+0j) == False
+    assert mp.isnpint(-1+0.1j) == False
+    assert mp.isnpint(0+0.1j) == False
+def test_issue_438():
+    assert mpf(finf) == mpf('inf')
+    assert mpf(fninf) == mpf('-inf')
+    assert mpf(fnan)._mpf_ == mpf('nan')._mpf_

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_bitwise.py ADDED Viewed

	@@ -0,0 +1,188 @@

+"""
+Test bit-level integer and mpf operations
+"""
+from mpmath import *
+from mpmath.libmp import *
+def test_bitcount():
+    assert bitcount(0) == 0
+    assert bitcount(1) == 1
+    assert bitcount(7) == 3
+    assert bitcount(8) == 4
+    assert bitcount(2**100) == 101
+    assert bitcount(2**100-1) == 100
+def test_trailing():
+    assert trailing(0) == 0
+    assert trailing(1) == 0
+    assert trailing(2) == 1
+    assert trailing(7) == 0
+    assert trailing(8) == 3
+    assert trailing(2**100) == 100
+    assert trailing(2**100-1) == 0
+def test_round_down():
+    assert from_man_exp(0, -4, 4, round_down)[:3] == (0, 0, 0)
+    assert from_man_exp(0xf0, -4, 4, round_down)[:3] == (0, 15, 0)
+    assert from_man_exp(0xf1, -4, 4, round_down)[:3] == (0, 15, 0)
+    assert from_man_exp(0xff, -4, 4, round_down)[:3] == (0, 15, 0)
+    assert from_man_exp(-0xf0, -4, 4, round_down)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xf1, -4, 4, round_down)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xff, -4, 4, round_down)[:3] == (1, 15, 0)
+def test_round_up():
+    assert from_man_exp(0, -4, 4, round_up)[:3] == (0, 0, 0)
+    assert from_man_exp(0xf0, -4, 4, round_up)[:3] == (0, 15, 0)
+    assert from_man_exp(0xf1, -4, 4, round_up)[:3] == (0, 1, 4)
+    assert from_man_exp(0xff, -4, 4, round_up)[:3] == (0, 1, 4)
+    assert from_man_exp(-0xf0, -4, 4, round_up)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xf1, -4, 4, round_up)[:3] == (1, 1, 4)
+    assert from_man_exp(-0xff, -4, 4, round_up)[:3] == (1, 1, 4)
+def test_round_floor():
+    assert from_man_exp(0, -4, 4, round_floor)[:3] == (0, 0, 0)
+    assert from_man_exp(0xf0, -4, 4, round_floor)[:3] == (0, 15, 0)
+    assert from_man_exp(0xf1, -4, 4, round_floor)[:3] == (0, 15, 0)
+    assert from_man_exp(0xff, -4, 4, round_floor)[:3] == (0, 15, 0)
+    assert from_man_exp(-0xf0, -4, 4, round_floor)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xf1, -4, 4, round_floor)[:3] == (1, 1, 4)
+    assert from_man_exp(-0xff, -4, 4, round_floor)[:3] == (1, 1, 4)
+def test_round_ceiling():
+    assert from_man_exp(0, -4, 4, round_ceiling)[:3] == (0, 0, 0)
+    assert from_man_exp(0xf0, -4, 4, round_ceiling)[:3] == (0, 15, 0)
+    assert from_man_exp(0xf1, -4, 4, round_ceiling)[:3] == (0, 1, 4)
+    assert from_man_exp(0xff, -4, 4, round_ceiling)[:3] == (0, 1, 4)
+    assert from_man_exp(-0xf0, -4, 4, round_ceiling)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xf1, -4, 4, round_ceiling)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xff, -4, 4, round_ceiling)[:3] == (1, 15, 0)
+def test_round_nearest():
+    assert from_man_exp(0, -4, 4, round_nearest)[:3] == (0, 0, 0)
+    assert from_man_exp(0xf0, -4, 4, round_nearest)[:3] == (0, 15, 0)
+    assert from_man_exp(0xf7, -4, 4, round_nearest)[:3] == (0, 15, 0)
+    assert from_man_exp(0xf8, -4, 4, round_nearest)[:3] == (0, 1, 4)    # 1111.1000 -> 10000.0
+    assert from_man_exp(0xf9, -4, 4, round_nearest)[:3] == (0, 1, 4)    # 1111.1001 -> 10000.0
+    assert from_man_exp(0xe8, -4, 4, round_nearest)[:3] == (0, 7, 1)    # 1110.1000 -> 1110.0
+    assert from_man_exp(0xe9, -4, 4, round_nearest)[:3] == (0, 15, 0)     # 1110.1001 -> 1111.0
+    assert from_man_exp(-0xf0, -4, 4, round_nearest)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xf7, -4, 4, round_nearest)[:3] == (1, 15, 0)
+    assert from_man_exp(-0xf8, -4, 4, round_nearest)[:3] == (1, 1, 4)
+    assert from_man_exp(-0xf9, -4, 4, round_nearest)[:3] == (1, 1, 4)
+    assert from_man_exp(-0xe8, -4, 4, round_nearest)[:3] == (1, 7, 1)
+    assert from_man_exp(-0xe9, -4, 4, round_nearest)[:3] == (1, 15, 0)
+def test_rounding_bugs():
+    # 1 less than power-of-two cases
+    assert from_man_exp(72057594037927935, -56, 53, round_up) == (0, 1, 0, 1)
+    assert from_man_exp(73786976294838205979, -65, 53, round_nearest) == (0, 1, 1, 1)
+    assert from_man_exp(31, 0, 4, round_up) == (0, 1, 5, 1)
+    assert from_man_exp(-31, 0, 4, round_floor) == (1, 1, 5, 1)
+    assert from_man_exp(255, 0, 7, round_up) == (0, 1, 8, 1)
+    assert from_man_exp(-255, 0, 7, round_floor) == (1, 1, 8, 1)
+def test_rounding_issue_200():
+    a = from_man_exp(9867,-100)
+    b = from_man_exp(9867,-200)
+    c = from_man_exp(-1,0)
+    z = (1, 1023, -10, 10)
+    assert mpf_add(a, c, 10, 'd') == z
+    assert mpf_add(b, c, 10, 'd') == z
+    assert mpf_add(c, a, 10, 'd') == z
+    assert mpf_add(c, b, 10, 'd') == z
+def test_perturb():
+    a = fone
+    b = from_float(0.99999999999999989)
+    c = from_float(1.0000000000000002)
+    assert mpf_perturb(a, 0, 53, round_nearest) == a
+    assert mpf_perturb(a, 1, 53, round_nearest) == a
+    assert mpf_perturb(a, 0, 53, round_up) == c
+    assert mpf_perturb(a, 0, 53, round_ceiling) == c
+    assert mpf_perturb(a, 0, 53, round_down) == a
+    assert mpf_perturb(a, 0, 53, round_floor) == a
+    assert mpf_perturb(a, 1, 53, round_up) == a
+    assert mpf_perturb(a, 1, 53, round_ceiling) == a
+    assert mpf_perturb(a, 1, 53, round_down) == b
+    assert mpf_perturb(a, 1, 53, round_floor) == b
+    a = mpf_neg(a)
+    b = mpf_neg(b)
+    c = mpf_neg(c)
+    assert mpf_perturb(a, 0, 53, round_nearest) == a
+    assert mpf_perturb(a, 1, 53, round_nearest) == a
+    assert mpf_perturb(a, 0, 53, round_up) == a
+    assert mpf_perturb(a, 0, 53, round_floor) == a
+    assert mpf_perturb(a, 0, 53, round_down) == b
+    assert mpf_perturb(a, 0, 53, round_ceiling) == b
+    assert mpf_perturb(a, 1, 53, round_up) == c
+    assert mpf_perturb(a, 1, 53, round_floor) == c
+    assert mpf_perturb(a, 1, 53, round_down) == a
+    assert mpf_perturb(a, 1, 53, round_ceiling) == a
+def test_add_exact():
+    ff = from_float
+    assert mpf_add(ff(3.0), ff(2.5)) == ff(5.5)
+    assert mpf_add(ff(3.0), ff(-2.5)) == ff(0.5)
+    assert mpf_add(ff(-3.0), ff(2.5)) == ff(-0.5)
+    assert mpf_add(ff(-3.0), ff(-2.5)) == ff(-5.5)
+    assert mpf_sub(mpf_add(fone, ff(1e-100)), fone) == ff(1e-100)
+    assert mpf_sub(mpf_add(ff(1e-100), fone), fone) == ff(1e-100)
+    assert mpf_sub(mpf_add(fone, ff(-1e-100)), fone) == ff(-1e-100)
+    assert mpf_sub(mpf_add(ff(-1e-100), fone), fone) == ff(-1e-100)
+    assert mpf_add(fone, fzero) == fone
+    assert mpf_add(fzero, fone) == fone
+    assert mpf_add(fzero, fzero) == fzero
+def test_long_exponent_shifts():
+    mp.dps = 15
+    # Check for possible bugs due to exponent arithmetic overflow
+    # in a C implementation
+    x = mpf(1)
+    for p in [32, 64]:
+        a = ldexp(1,2**(p-1))
+        b = ldexp(1,2**p)
+        c = ldexp(1,2**(p+1))
+        d = ldexp(1,-2**(p-1))
+        e = ldexp(1,-2**p)
+        f = ldexp(1,-2**(p+1))
+        assert (x+a) == a
+        assert (x+b) == b
+        assert (x+c) == c
+        assert (x+d) == x
+        assert (x+e) == x
+        assert (x+f) == x
+        assert (a+x) == a
+        assert (b+x) == b
+        assert (c+x) == c
+        assert (d+x) == x
+        assert (e+x) == x
+        assert (f+x) == x
+        assert (x-a) == -a
+        assert (x-b) == -b
+        assert (x-c) == -c
+        assert (x-d) == x
+        assert (x-e) == x
+        assert (x-f) == x
+        assert (a-x) == a
+        assert (b-x) == b
+        assert (c-x) == c
+        assert (d-x) == -x
+        assert (e-x) == -x
+        assert (f-x) == -x
+def test_float_rounding():
+    mp.prec = 64
+    for x in [mpf(1), mpf(1)+eps, mpf(1)-eps, -mpf(1)+eps, -mpf(1)-eps]:
+        fa = float(x)
+        fb = float(fadd(x,0,prec=53,rounding='n'))
+        assert fa == fb
+        z = mpc(x,x)
+        ca = complex(z)
+        cb = complex(fadd(z,0,prec=53,rounding='n'))
+        assert ca == cb
+        for rnd in ['n', 'd', 'u', 'f', 'c']:
+            fa = to_float(x._mpf_, rnd=rnd)
+            fb = to_float(fadd(x,0,prec=53,rounding=rnd)._mpf_, rnd=rnd)
+            assert fa == fb
+    mp.prec = 53

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_convert.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import random
+from mpmath import *
+from mpmath.libmp import *
+def test_basic_string():
+    """
+    Test basic string conversion
+    """
+    mp.dps = 15
+    assert mpf('3') == mpf('3.0') == mpf('0003.') == mpf('0.03e2') == mpf(3.0)
+    assert mpf('30') == mpf('30.0') == mpf('00030.') == mpf(30.0)
+    for i in range(10):
+        for j in range(10):
+            assert mpf('%ie%i' % (i,j)) == i * 10**j
+    assert str(mpf('25000.0')) == '25000.0'
+    assert str(mpf('2500.0')) == '2500.0'
+    assert str(mpf('250.0')) == '250.0'
+    assert str(mpf('25.0')) == '25.0'
+    assert str(mpf('2.5')) == '2.5'
+    assert str(mpf('0.25')) == '0.25'
+    assert str(mpf('0.025')) == '0.025'
+    assert str(mpf('0.0025')) == '0.0025'
+    assert str(mpf('0.00025')) == '0.00025'
+    assert str(mpf('0.000025')) == '2.5e-5'
+    assert str(mpf(0)) == '0.0'
+    assert str(mpf('2.5e1000000000000000000000')) == '2.5e+1000000000000000000000'
+    assert str(mpf('2.6e-1000000000000000000000')) == '2.6e-1000000000000000000000'
+    assert str(mpf(1.23402834e-15)) == '1.23402834e-15'
+    assert str(mpf(-1.23402834e-15)) == '-1.23402834e-15'
+    assert str(mpf(-1.2344e-15)) == '-1.2344e-15'
+    assert repr(mpf(-1.2344e-15)) == "mpf('-1.2343999999999999e-15')"
+    assert str(mpf("2163048125L")) == '2163048125.0'
+    assert str(mpf("-2163048125l")) == '-2163048125.0'
+    assert str(mpf("-2163048125L/1088391168")) == '-1.98738118113799'
+    assert str(mpf("2163048125/1088391168l")) == '1.98738118113799'
+def test_pretty():
+    mp.pretty = True
+    assert repr(mpf(2.5)) == '2.5'
+    assert repr(mpc(2.5,3.5)) == '(2.5 + 3.5j)'
+    mp.pretty = False
+    iv.pretty = True
+    assert repr(mpi(2.5,3.5)) == '[2.5, 3.5]'
+    iv.pretty = False
+def test_str_whitespace():
+    assert mpf('1.26 ') == 1.26
+def test_unicode():
+    mp.dps = 15
+    try:
+        unicode = unicode
+    except NameError:
+        unicode = str
+    assert mpf(unicode('2.76')) == 2.76
+    assert mpf(unicode('inf')) == inf
+def test_str_format():
+    assert to_str(from_float(0.1),15,strip_zeros=False) == '0.100000000000000'
+    assert to_str(from_float(0.0),15,show_zero_exponent=True) == '0.0e+0'
+    assert to_str(from_float(0.0),0,show_zero_exponent=True) == '.0e+0'
+    assert to_str(from_float(0.0),0,show_zero_exponent=False) == '.0'
+    assert to_str(from_float(0.0),1,show_zero_exponent=True) == '0.0e+0'
+    assert to_str(from_float(0.0),1,show_zero_exponent=False) == '0.0'
+    assert to_str(from_float(1.23),3,show_zero_exponent=True) == '1.23e+0'
+    assert to_str(from_float(1.23456789000000e-2),15,strip_zeros=False,min_fixed=0,max_fixed=0) == '1.23456789000000e-2'
+    assert to_str(from_float(1.23456789000000e+2),15,strip_zeros=False,min_fixed=0,max_fixed=0) == '1.23456789000000e+2'
+    assert to_str(from_float(2.1287e14), 15, max_fixed=1000) == '212870000000000.0'
+    assert to_str(from_float(2.1287e15), 15, max_fixed=1000) == '2128700000000000.0'
+    assert to_str(from_float(2.1287e16), 15, max_fixed=1000) == '21287000000000000.0'
+    assert to_str(from_float(2.1287e30), 15, max_fixed=1000) == '2128700000000000000000000000000.0'
+def test_tight_string_conversion():
+    mp.dps = 15
+    # In an old version, '0.5' wasn't recognized as representing
+    # an exact binary number and was erroneously rounded up or down
+    assert from_str('0.5', 10, round_floor) == fhalf
+    assert from_str('0.5', 10, round_ceiling) == fhalf
+def test_eval_repr_invariant():
+    """Test that eval(repr(x)) == x"""
+    random.seed(123)
+    for dps in [10, 15, 20, 50, 100]:
+        mp.dps = dps
+        for i in range(1000):
+            a = mpf(random.random())**0.5 * 10**random.randint(-100, 100)
+            assert eval(repr(a)) == a
+    mp.dps = 15
+def test_str_bugs():
+    mp.dps = 15
+    # Decimal rounding used to give the wrong exponent in some cases
+    assert str(mpf('1e600')) == '1.0e+600'
+    assert str(mpf('1e10000')) == '1.0e+10000'
+def test_str_prec0():
+    assert to_str(from_float(1.234), 0) == '.0e+0'
+    assert to_str(from_float(1e-15), 0) == '.0e-15'
+    assert to_str(from_float(1e+15), 0) == '.0e+15'
+    assert to_str(from_float(-1e-15), 0) == '-.0e-15'
+    assert to_str(from_float(-1e+15), 0) == '-.0e+15'
+def test_convert_rational():
+    mp.dps = 15
+    assert from_rational(30, 5, 53, round_nearest) == (0, 3, 1, 2)
+    assert from_rational(-7, 4, 53, round_nearest) == (1, 7, -2, 3)
+    assert to_rational((0, 1, -1, 1)) == (1, 2)
+def test_custom_class():
+    class mympf:
+        @property
+        def _mpf_(self):
+            return mpf(3.5)._mpf_
+    class mympc:
+        @property
+        def _mpc_(self):
+            return mpf(3.5)._mpf_, mpf(2.5)._mpf_
+    assert mpf(2) + mympf() == 5.5
+    assert mympf() + mpf(2) == 5.5
+    assert mpf(mympf()) == 3.5
+    assert mympc() + mpc(2) == mpc(5.5, 2.5)
+    assert mpc(2) + mympc() == mpc(5.5, 2.5)
+    assert mpc(mympc()) == (3.5+2.5j)
+def test_conversion_methods():
+    class SomethingRandom:
+        pass
+    class SomethingReal:
+        def _mpmath_(self, prec, rounding):
+            return mp.make_mpf(from_str('1.3', prec, rounding))
+    class SomethingComplex:
+        def _mpmath_(self, prec, rounding):
+            return mp.make_mpc((from_str('1.3', prec, rounding), \
+                from_str('1.7', prec, rounding)))
+    x = mpf(3)
+    z = mpc(3)
+    a = SomethingRandom()
+    y = SomethingReal()
+    w = SomethingComplex()
+    for d in [15, 45]:
+        mp.dps = d
+        assert (x+y).ae(mpf('4.3'))
+        assert (y+x).ae(mpf('4.3'))
+        assert (x+w).ae(mpc('4.3', '1.7'))
+        assert (w+x).ae(mpc('4.3', '1.7'))
+        assert (z+y).ae(mpc('4.3'))
+        assert (y+z).ae(mpc('4.3'))
+        assert (z+w).ae(mpc('4.3', '1.7'))
+        assert (w+z).ae(mpc('4.3', '1.7'))
+        x-y; y-x; x-w; w-x; z-y; y-z; z-w; w-z
+        x*y; y*x; x*w; w*x; z*y; y*z; z*w; w*z
+        x/y; y/x; x/w; w/x; z/y; y/z; z/w; w/z
+        x**y; y**x; x**w; w**x; z**y; y**z; z**w; w**z
+        x==y; y==x; x==w; w==x; z==y; y==z; z==w; w==z
+    mp.dps = 15
+    assert x.__add__(a) is NotImplemented
+    assert x.__radd__(a) is NotImplemented
+    assert x.__lt__(a) is NotImplemented
+    assert x.__gt__(a) is NotImplemented
+    assert x.__le__(a) is NotImplemented
+    assert x.__ge__(a) is NotImplemented
+    assert x.__eq__(a) is NotImplemented
+    assert x.__ne__(a) is NotImplemented
+    # implementation detail
+    if hasattr(x, "__cmp__"):
+        assert x.__cmp__(a) is NotImplemented
+    assert x.__sub__(a) is NotImplemented
+    assert x.__rsub__(a) is NotImplemented
+    assert x.__mul__(a) is NotImplemented
+    assert x.__rmul__(a) is NotImplemented
+    assert x.__div__(a) is NotImplemented
+    assert x.__rdiv__(a) is NotImplemented
+    assert x.__mod__(a) is NotImplemented
+    assert x.__rmod__(a) is NotImplemented
+    assert x.__pow__(a) is NotImplemented
+    assert x.__rpow__(a) is NotImplemented
+    assert z.__add__(a) is NotImplemented
+    assert z.__radd__(a) is NotImplemented
+    assert z.__eq__(a) is NotImplemented
+    assert z.__ne__(a) is NotImplemented
+    assert z.__sub__(a) is NotImplemented
+    assert z.__rsub__(a) is NotImplemented
+    assert z.__mul__(a) is NotImplemented
+    assert z.__rmul__(a) is NotImplemented
+    assert z.__div__(a) is NotImplemented
+    assert z.__rdiv__(a) is NotImplemented
+    assert z.__pow__(a) is NotImplemented
+    assert z.__rpow__(a) is NotImplemented
+def test_mpmathify():
+    assert mpmathify('1/2') == 0.5
+    assert mpmathify('(1.0+1.0j)') == mpc(1, 1)
+    assert mpmathify('(1.2e-10 - 3.4e5j)') == mpc('1.2e-10', '-3.4e5')
+    assert mpmathify('1j') == mpc(1j)
+def test_issue548():
+    try:
+        # This expression is invalid, but may trigger the ReDOS vulnerability
+        # in the regular expression for parsing complex numbers.
+        mpmathify('(' + '1' * 5000 + '!j')
+    except:
+        return
+    # The expression is invalid and should raise an exception.
+    assert False
+def test_compatibility():
+    try:
+        import numpy as np
+        from fractions import Fraction
+        from decimal import Decimal
+        import decimal
+    except ImportError:
+        return
+    # numpy types
+    for nptype in np.core.numerictypes.typeDict.values():
+        if issubclass(nptype, np.complexfloating):
+            x = nptype(complex(0.5, -0.5))
+        elif issubclass(nptype, np.floating):
+            x = nptype(0.5)
+        elif issubclass(nptype, np.integer):
+            x = nptype(2)
+        # Handle the weird types
+        try: diff = np.abs(type(np.sqrt(x))(sqrt(x)) - np.sqrt(x))
+        except: continue
+        assert diff < 2.0**-53
+    #Fraction and Decimal
+    oldprec = mp.prec
+    mp.prec = 1000
+    decimal.getcontext().prec = mp.dps
+    assert sqrt(Fraction(2, 3)).ae(sqrt(mpf('2/3')))
+    assert sqrt(Decimal(2)/Decimal(3)).ae(sqrt(mpf('2/3')))
+    mp.prec = oldprec

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_eigen.py ADDED Viewed

	@@ -0,0 +1,179 @@

+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+from mpmath import mp
+from mpmath import libmp
+xrange = libmp.backend.xrange
+def run_hessenberg(A, verbose = 0):
+    if verbose > 1:
+        print("original matrix (hessenberg):\n", A)
+    n = A.rows
+    Q, H = mp.hessenberg(A)
+    if verbose > 1:
+        print("Q:\n",Q)
+        print("H:\n",H)
+    B = Q * H * Q.transpose_conj()
+    eps = mp.exp(0.8 * mp.log(mp.eps))
+    err0 = 0
+    for x in xrange(n):
+        for y in xrange(n):
+            err0 += abs(A[y,x] - B[y,x])
+    err0 /= n * n
+    err1 = 0
+    for x in xrange(n):
+        for y in xrange(x + 2, n):
+            err1 += abs(H[y,x])
+    if verbose > 0:
+        print("difference (H):", err0, err1)
+    if verbose > 1:
+        print("B:\n", B)
+    assert err0 < eps
+    assert err1 == 0
+def run_schur(A, verbose = 0):
+    if verbose > 1:
+        print("original matrix (schur):\n", A)
+    n = A.rows
+    Q, R = mp.schur(A)
+    if verbose > 1:
+        print("Q:\n", Q)
+        print("R:\n", R)
+    B = Q * R * Q.transpose_conj()
+    C = Q * Q.transpose_conj()
+    eps = mp.exp(0.8 * mp.log(mp.eps))
+    err0 = 0
+    for x in xrange(n):
+        for y in xrange(n):
+            err0 += abs(A[y,x] - B[y,x])
+    err0 /= n * n
+    err1 = 0
+    for x in xrange(n):
+        for y in xrange(n):
+            if x == y:
+                C[y,x] -= 1
+            err1 += abs(C[y,x])
+    err1 /= n * n
+    err2 = 0
+    for x in xrange(n):
+        for y in xrange(x + 1, n):
+            err2 += abs(R[y,x])
+    if verbose > 0:
+        print("difference (S):", err0, err1, err2)
+    if verbose > 1:
+        print("B:\n", B)
+    assert err0 < eps
+    assert err1 < eps
+    assert err2 == 0
+def run_eig(A, verbose = 0):
+    if verbose > 1:
+        print("original matrix (eig):\n", A)
+    n = A.rows
+    E, EL, ER = mp.eig(A, left = True, right = True)
+    if verbose > 1:
+        print("E:\n", E)
+        print("EL:\n", EL)
+        print("ER:\n", ER)
+    eps = mp.exp(0.8 * mp.log(mp.eps))
+    err0 = 0
+    for i in xrange(n):
+        B = A * ER[:,i] - E[i] * ER[:,i]
+        err0 = max(err0, mp.mnorm(B))
+        B = EL[i,:] * A - EL[i,:] * E[i]
+        err0 = max(err0, mp.mnorm(B))
+    err0 /= n * n
+    if verbose > 0:
+        print("difference (E):", err0)
+    assert err0 < eps
+#####################
+def test_eig_dyn():
+    v = 0
+    for i in xrange(5):
+        n = 1 + int(mp.rand() * 5)
+        if mp.rand() > 0.5:
+            # real
+            A = 2 * mp.randmatrix(n, n) - 1
+            if mp.rand() > 0.5:
+                A *= 10
+                for x in xrange(n):
+                    for y in xrange(n):
+                        A[x,y] = int(A[x,y])
+        else:
+            A = (2 * mp.randmatrix(n, n) - 1) + 1j * (2 * mp.randmatrix(n, n) - 1)
+            if mp.rand() > 0.5:
+                A *= 10
+                for x in xrange(n):
+                    for y in xrange(n):
+                        A[x,y] = int(mp.re(A[x,y])) + 1j * int(mp.im(A[x,y]))
+        run_hessenberg(A, verbose = v)
+        run_schur(A, verbose = v)
+        run_eig(A, verbose = v)
+def test_eig():
+    v = 0
+    AS = []
+    A = mp.matrix([[2, 1, 0],  # jordan block of size 3
+                   [0, 2, 1],
+                   [0, 0, 2]])
+    AS.append(A)
+    AS.append(A.transpose())
+    A = mp.matrix([[2, 0, 0],  # jordan block of size 2
+                   [0, 2, 1],
+                   [0, 0, 2]])
+    AS.append(A)
+    AS.append(A.transpose())
+    A = mp.matrix([[2, 0, 1],  # jordan block of size 2
+                   [0, 2, 0],
+                   [0, 0, 2]])
+    AS.append(A)
+    AS.append(A.transpose())
+    A=  mp.matrix([[0, 0, 1],  # cyclic
+                   [1, 0, 0],
+                   [0, 1, 0]])
+    AS.append(A)
+    AS.append(A.transpose())
+    for A in AS:
+        run_hessenberg(A, verbose = v)
+        run_schur(A, verbose = v)
+        run_eig(A, verbose = v)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_fp.py ADDED Viewed

	@@ -0,0 +1,1671 @@

+"""
+Easy-to-use test-generating code:
+cases = '''
+exp 2.25
+log 2.25
+'''
+from mpmath import *
+mp.dps = 20
+for test in cases.splitlines():
+    if not test:
+        continue
+    words = test.split()
+    fname = words[0]
+    args = words[1:]
+    argstr = ", ".join(args)
+    testline = "%s(%s)" % (fname, argstr)
+    ans = str(eval(testline))
+    print "    assert ae(fp.%s, %s)" % (testline, ans)
+"""
+from mpmath import fp
+def ae(x, y, tol=1e-12):
+    if x == y:
+        return True
+    return abs(x-y) <= tol*abs(y)
+def test_conj():
+    assert fp.conj(4) == 4
+    assert fp.conj(3+4j) == 3-4j
+    assert fp.fdot([1,2],[3,2+1j], conjugate=True) == 7-2j
+def test_fp_number_parts():
+    assert ae(fp.arg(3), 0.0)
+    assert ae(fp.arg(-3), 3.1415926535897932385)
+    assert ae(fp.arg(3j), 1.5707963267948966192)
+    assert ae(fp.arg(-3j), -1.5707963267948966192)
+    assert ae(fp.arg(2+3j), 0.98279372324732906799)
+    assert ae(fp.arg(-1-1j), -2.3561944901923449288)
+    assert ae(fp.re(2.5), 2.5)
+    assert ae(fp.re(2.5+3j), 2.5)
+    assert ae(fp.im(2.5), 0.0)
+    assert ae(fp.im(2.5+3j), 3.0)
+    assert ae(fp.floor(2.5), 2.0)
+    assert ae(fp.floor(2), 2.0)
+    assert ae(fp.floor(2.0+0j), (2.0 + 0.0j))
+    assert ae(fp.floor(-1.5-0.5j), (-2.0 - 1.0j))
+    assert ae(fp.ceil(2.5), 3.0)
+    assert ae(fp.ceil(2), 2.0)
+    assert ae(fp.ceil(2.0+0j), (2.0 + 0.0j))
+    assert ae(fp.ceil(-1.5-0.5j), (-1.0 + 0.0j))
+def test_fp_cospi_sinpi():
+    assert ae(fp.sinpi(0), 0.0)
+    assert ae(fp.sinpi(0.25), 0.7071067811865475244)
+    assert ae(fp.sinpi(0.5), 1.0)
+    assert ae(fp.sinpi(0.75), 0.7071067811865475244)
+    assert ae(fp.sinpi(1), 0.0)
+    assert ae(fp.sinpi(1.25), -0.7071067811865475244)
+    assert ae(fp.sinpi(1.5), -1.0)
+    assert ae(fp.sinpi(1.75), -0.7071067811865475244)
+    assert ae(fp.sinpi(2), 0.0)
+    assert ae(fp.sinpi(2.25), 0.7071067811865475244)
+    assert ae(fp.sinpi(0+3j), (0.0 + 6195.8238636085899556j))
+    assert ae(fp.sinpi(0.25+3j), (4381.1091260582448033 + 4381.1090689950686908j))
+    assert ae(fp.sinpi(0.5+3j), (6195.8239443081075259 + 0.0j))
+    assert ae(fp.sinpi(0.75+3j), (4381.1091260582448033 - 4381.1090689950686908j))
+    assert ae(fp.sinpi(1+3j), (0.0 - 6195.8238636085899556j))
+    assert ae(fp.sinpi(1.25+3j), (-4381.1091260582448033 - 4381.1090689950686908j))
+    assert ae(fp.sinpi(1.5+3j), (-6195.8239443081075259 + 0.0j))
+    assert ae(fp.sinpi(1.75+3j), (-4381.1091260582448033 + 4381.1090689950686908j))
+    assert ae(fp.sinpi(2+3j), (0.0 + 6195.8238636085899556j))
+    assert ae(fp.sinpi(2.25+3j), (4381.1091260582448033 + 4381.1090689950686908j))
+    assert ae(fp.sinpi(-0.75), -0.7071067811865475244)
+    assert ae(fp.sinpi(-1e-10), -3.1415926535897933529e-10)
+    assert ae(fp.sinpi(1e-10), 3.1415926535897933529e-10)
+    assert ae(fp.sinpi(1e-10+1e-10j), (3.141592653589793353e-10 + 3.1415926535897933528e-10j))
+    assert ae(fp.sinpi(1e-10-1e-10j), (3.141592653589793353e-10 - 3.1415926535897933528e-10j))
+    assert ae(fp.sinpi(-1e-10+1e-10j), (-3.141592653589793353e-10 + 3.1415926535897933528e-10j))
+    assert ae(fp.sinpi(-1e-10-1e-10j), (-3.141592653589793353e-10 - 3.1415926535897933528e-10j))
+    assert ae(fp.cospi(0), 1.0)
+    assert ae(fp.cospi(0.25), 0.7071067811865475244)
+    assert ae(fp.cospi(0.5), 0.0)
+    assert ae(fp.cospi(0.75), -0.7071067811865475244)
+    assert ae(fp.cospi(1), -1.0)
+    assert ae(fp.cospi(1.25), -0.7071067811865475244)
+    assert ae(fp.cospi(1.5), 0.0)
+    assert ae(fp.cospi(1.75), 0.7071067811865475244)
+    assert ae(fp.cospi(2), 1.0)
+    assert ae(fp.cospi(2.25), 0.7071067811865475244)
+    assert ae(fp.cospi(0+3j), (6195.8239443081075259 + 0.0j))
+    assert ae(fp.cospi(0.25+3j), (4381.1091260582448033 - 4381.1090689950686908j))
+    assert ae(fp.cospi(0.5+3j), (0.0 - 6195.8238636085899556j))
+    assert ae(fp.cospi(0.75+3j), (-4381.1091260582448033 - 4381.1090689950686908j))
+    assert ae(fp.cospi(1+3j), (-6195.8239443081075259 + 0.0j))
+    assert ae(fp.cospi(1.25+3j), (-4381.1091260582448033 + 4381.1090689950686908j))
+    assert ae(fp.cospi(1.5+3j), (0.0 + 6195.8238636085899556j))
+    assert ae(fp.cospi(1.75+3j), (4381.1091260582448033 + 4381.1090689950686908j))
+    assert ae(fp.cospi(2+3j), (6195.8239443081075259 + 0.0j))
+    assert ae(fp.cospi(2.25+3j), (4381.1091260582448033 - 4381.1090689950686908j))
+    assert ae(fp.cospi(-0.75), -0.7071067811865475244)
+    assert ae(fp.sinpi(-0.7), -0.80901699437494750611)
+    assert ae(fp.cospi(-0.7), -0.5877852522924730163)
+    assert ae(fp.cospi(-3+2j), (-267.74676148374822225 + 0.0j))
+    assert ae(fp.sinpi(-3+2j), (0.0 - 267.74489404101651426j))
+    assert ae(fp.sinpi(-0.7+2j), (-216.6116802292079471 - 157.37650009392034693j))
+    assert ae(fp.cospi(-0.7+2j), (-157.37759774921754565 + 216.61016943630197336j))
+def test_fp_expj():
+    assert ae(fp.expj(0), (1.0 + 0.0j))
+    assert ae(fp.expj(1), (0.5403023058681397174 + 0.84147098480789650665j))
+    assert ae(fp.expj(2), (-0.416146836547142387 + 0.9092974268256816954j))
+    assert ae(fp.expj(0.75), (0.73168886887382088631 + 0.68163876002333416673j))
+    assert ae(fp.expj(2+3j), (-0.020718731002242879378 + 0.045271253156092975488j))
+    assert ae(fp.expjpi(0), (1.0 + 0.0j))
+    assert ae(fp.expjpi(1), (-1.0 + 0.0j))
+    assert ae(fp.expjpi(2), (1.0 + 0.0j))
+    assert ae(fp.expjpi(0.75), (-0.7071067811865475244 + 0.7071067811865475244j))
+    assert ae(fp.expjpi(2+3j), (0.000080699517570304599239 + 0.0j))
+def test_fp_bernoulli():
+    assert ae(fp.bernoulli(0), 1.0)
+    assert ae(fp.bernoulli(1), -0.5)
+    assert ae(fp.bernoulli(2), 0.16666666666666666667)
+    assert ae(fp.bernoulli(10), 0.075757575757575757576)
+    assert ae(fp.bernoulli(11), 0.0)
+def test_fp_gamma():
+    assert ae(fp.gamma(1), 1.0)
+    assert ae(fp.gamma(1.5), 0.88622692545275801365)
+    assert ae(fp.gamma(10), 362880.0)
+    assert ae(fp.gamma(-0.5), -3.5449077018110320546)
+    assert ae(fp.gamma(-7.1), 0.0016478244570263333622)
+    assert ae(fp.gamma(12.3), 83385367.899970000963)
+    assert ae(fp.gamma(2+0j), (1.0 + 0.0j))
+    assert ae(fp.gamma(-2.5+0j), (-0.94530872048294188123 + 0.0j))
+    assert ae(fp.gamma(3+4j), (0.0052255384713692141947 - 0.17254707929430018772j))
+    assert ae(fp.gamma(-3-4j), (0.00001460997305874775607 - 0.000020760733311509070396j))
+    assert ae(fp.fac(0), 1.0)
+    assert ae(fp.fac(1), 1.0)
+    assert ae(fp.fac(20), 2432902008176640000.0)
+    assert ae(fp.fac(-3.5), -0.94530872048294188123)
+    assert ae(fp.fac(2+3j), (-0.44011340763700171113 - 0.06363724312631702183j))
+    assert ae(fp.loggamma(1.0), 0.0)
+    assert ae(fp.loggamma(2.0), 0.0)
+    assert ae(fp.loggamma(3.0), 0.69314718055994530942)
+    assert ae(fp.loggamma(7.25), 7.0521854507385394449)
+    assert ae(fp.loggamma(1000.0), 5905.2204232091812118)
+    assert ae(fp.loggamma(1e50), 1.1412925464970229298e+52)
+    assert ae(fp.loggamma(1e25+1e25j), (5.6125802751733671621e+26 + 5.7696599078528568383e+26j))
+    assert ae(fp.loggamma(3+4j), (-1.7566267846037841105 + 4.7426644380346579282j))
+    assert ae(fp.loggamma(-0.5), (1.2655121234846453965 - 3.1415926535897932385j))
+    assert ae(fp.loggamma(-1.25), (1.3664317612369762346 - 6.2831853071795864769j))
+    assert ae(fp.loggamma(-2.75), (0.0044878975359557733115 - 9.4247779607693797154j))
+    assert ae(fp.loggamma(-3.5), (-1.3090066849930420464 - 12.566370614359172954j))
+    assert ae(fp.loggamma(-4.5), (-2.8130840817693161197 - 15.707963267948966192j))
+    assert ae(fp.loggamma(-2+3j), (-6.776523813485657093 - 4.568791367260286402j))
+    assert ae(fp.loggamma(-1000.3), (-5912.8440347785205041 - 3144.7342462433830317j))
+    assert ae(fp.loggamma(-100-100j), (-632.35117666833135562 - 158.37641469650352462j))
+    assert ae(fp.loggamma(1e-10), 23.025850929882735237)
+    assert ae(fp.loggamma(-1e-10), (23.02585092999817837 - 3.1415926535897932385j))
+    assert ae(fp.loggamma(1e-10j), (23.025850929940456804 - 1.5707963268526181857j))
+    assert ae(fp.loggamma(1e-10j-1e-10), (22.679277339718205716 - 2.3561944902500664954j))
+def test_fp_psi():
+    assert ae(fp.psi(0, 3.7), 1.1671535393615114409)
+    assert ae(fp.psi(0, 0.5), -1.9635100260214234794)
+    assert ae(fp.psi(0, 1), -0.57721566490153286061)
+    assert ae(fp.psi(0, -2.5), 1.1031566406452431872)
+    assert ae(fp.psi(0, 12.9), 2.5179671503279156347)
+    assert ae(fp.psi(0, 100), 4.6001618527380874002)
+    assert ae(fp.psi(0, 2500.3), 7.8239660143238547877)
+    assert ae(fp.psi(0, 1e40), 92.103403719761827391)
+    assert ae(fp.psi(0, 1e200), 460.51701859880913677)
+    assert ae(fp.psi(0, 3.7+0j), (1.1671535393615114409 + 0.0j))
+    assert ae(fp.psi(1, 3), 0.39493406684822643647)
+    assert ae(fp.psi(3, 2+3j), (-0.05383196209159972116 + 0.0076890935247364805218j))
+    assert ae(fp.psi(4, -0.5+1j), (1.2719531355492328195 - 18.211833410936276774j))
+    assert ae(fp.harmonic(0), 0.0)
+    assert ae(fp.harmonic(1), 1.0)
+    assert ae(fp.harmonic(2), 1.5)
+    assert ae(fp.harmonic(100), 5.1873775176396202608)
+    assert ae(fp.harmonic(-2.5), 1.2803723055467760478)
+    assert ae(fp.harmonic(2+3j), (1.9390425294578375875 + 0.87336044981834544043j))
+    assert ae(fp.harmonic(-5-4j), (2.3725754822349437733 - 2.4160904444801621j))
+def test_fp_zeta():
+    assert ae(fp.zeta(1e100), 1.0)
+    assert ae(fp.zeta(3), 1.2020569031595942854)
+    assert ae(fp.zeta(2+0j), (1.6449340668482264365 + 0.0j))
+    assert ae(fp.zeta(0.93), -13.713619351638164784)
+    assert ae(fp.zeta(1.74), 1.9796863545771774095)
+    assert ae(fp.zeta(0.0), -0.5)
+    assert ae(fp.zeta(-1.0), -0.083333333333333333333)
+    assert ae(fp.zeta(-2.0), 0.0)
+    assert ae(fp.zeta(-3.0), 0.0083333333333333333333)
+    assert ae(fp.zeta(-500.0), 0.0)
+    assert ae(fp.zeta(-7.4), 0.0036537321227995882447)
+    assert ae(fp.zeta(2.1), 1.5602165335033620158)
+    assert ae(fp.zeta(26.9), 1.0000000079854809935)
+    assert ae(fp.zeta(26), 1.0000000149015548284)
+    assert ae(fp.zeta(27), 1.0000000074507117898)
+    assert ae(fp.zeta(28), 1.0000000037253340248)
+    assert ae(fp.zeta(27.1), 1.000000006951755045)
+    assert ae(fp.zeta(32.7), 1.0000000001433243232)
+    assert ae(fp.zeta(100), 1.0)
+    assert ae(fp.altzeta(3.5), 0.92755357777394803511)
+    assert ae(fp.altzeta(1), 0.69314718055994530942)
+    assert ae(fp.altzeta(2), 0.82246703342411321824)
+    assert ae(fp.altzeta(0), 0.5)
+    assert ae(fp.zeta(-2+3j, 1), (0.13297115587929864827 + 0.12305330040458776494j))
+    assert ae(fp.zeta(-2+3j, 5), (18.384866151867576927 - 11.377015110597711009j))
+    assert ae(fp.zeta(1.0000000001), 9999999173.1735741337)
+    assert ae(fp.zeta(0.9999999999), -9999999172.0191428039)
+    assert ae(fp.zeta(1+0.000000001j), (0.57721566490153286061 - 999999999.99999993765j))
+    assert ae(fp.primezeta(2.5+4j), (-0.16922458243438033385 - 0.010847965298387727811j))
+    assert ae(fp.primezeta(4), 0.076993139764246844943)
+    assert ae(fp.riemannr(3.7), 2.3034079839110855717)
+    assert ae(fp.riemannr(8), 3.9011860449341499474)
+    assert ae(fp.riemannr(3+4j), (2.2369653314259991796 + 1.6339943856990281694j))
+def test_fp_hyp2f1():
+    assert ae(fp.hyp2f1(1, (3,2), 3.25, 5.0), (-0.46600275923108143059 - 0.74393667908854842325j))
+    assert ae(fp.hyp2f1(1+1j, (3,2), 3.25, 5.0), (-5.9208875603806515987 - 2.3813557707889590686j))
+    assert ae(fp.hyp2f1(1+1j, (3,2), 3.25, 2+3j), (0.17174552030925080445 + 0.19589781970539389999j))
+def test_fp_erf():
+    assert fp.erf(2) == fp.erf(2.0) == fp.erf(2.0+0.0j)
+    assert fp.erf(fp.inf) == 1.0
+    assert fp.erf(fp.ninf) == -1.0
+    assert ae(fp.erf(0), 0.0)
+    assert ae(fp.erf(-0), -0.0)
+    assert ae(fp.erf(0.3), 0.32862675945912741619)
+    assert ae(fp.erf(-0.3), -0.32862675945912741619)
+    assert ae(fp.erf(0.9), 0.79690821242283213966)
+    assert ae(fp.erf(-0.9), -0.79690821242283213966)
+    assert ae(fp.erf(1.0), 0.84270079294971486934)
+    assert ae(fp.erf(-1.0), -0.84270079294971486934)
+    assert ae(fp.erf(1.1), 0.88020506957408172966)
+    assert ae(fp.erf(-1.1), -0.88020506957408172966)
+    assert ae(fp.erf(8.5), 1.0)
+    assert ae(fp.erf(-8.5), -1.0)
+    assert ae(fp.erf(9.1), 1.0)
+    assert ae(fp.erf(-9.1), -1.0)
+    assert ae(fp.erf(20.0), 1.0)
+    assert ae(fp.erf(-20.0), -1.0)
+    assert ae(fp.erf(10000.0), 1.0)
+    assert ae(fp.erf(-10000.0), -1.0)
+    assert ae(fp.erf(1e+50), 1.0)
+    assert ae(fp.erf(-1e+50), -1.0)
+    assert ae(fp.erf(1j), 1.650425758797542876j)
+    assert ae(fp.erf(-1j), -1.650425758797542876j)
+    assert ae(fp.erf((2+3j)), (-20.829461427614568389 + 8.6873182714701631444j))
+    assert ae(fp.erf(-(2+3j)), -(-20.829461427614568389 + 8.6873182714701631444j))
+    assert ae(fp.erf((8+9j)), (-1072004.2525062051158 + 364149.91954310255423j))
+    assert ae(fp.erf(-(8+9j)), -(-1072004.2525062051158 + 364149.91954310255423j))
+    assert fp.erfc(fp.inf) == 0.0
+    assert fp.erfc(fp.ninf) == 2.0
+    assert fp.erfc(0) == 1
+    assert fp.erfc(-0.0) == 1
+    assert fp.erfc(0+0j) == 1
+    assert ae(fp.erfc(0.3), 0.67137324054087258381)
+    assert ae(fp.erfc(-0.3), 1.3286267594591274162)
+    assert ae(fp.erfc(0.9), 0.20309178757716786034)
+    assert ae(fp.erfc(-0.9), 1.7969082124228321397)
+    assert ae(fp.erfc(1.0), 0.15729920705028513066)
+    assert ae(fp.erfc(-1.0), 1.8427007929497148693)
+    assert ae(fp.erfc(1.1), 0.11979493042591827034)
+    assert ae(fp.erfc(-1.1), 1.8802050695740817297)
+    assert ae(fp.erfc(8.5), 2.7623240713337714461e-33)
+    assert ae(fp.erfc(-8.5), 2.0)
+    assert ae(fp.erfc(9.1), 6.6969004279886077452e-38)
+    assert ae(fp.erfc(-9.1), 2.0)
+    assert ae(fp.erfc(20.0), 5.3958656116079009289e-176)
+    assert ae(fp.erfc(-20.0), 2.0)
+    assert ae(fp.erfc(10000.0), 0.0)
+    assert ae(fp.erfc(-10000.0), 2.0)
+    assert ae(fp.erfc(1e+50), 0.0)
+    assert ae(fp.erfc(-1e+50), 2.0)
+    assert ae(fp.erfc(1j), (1.0 - 1.650425758797542876j))
+    assert ae(fp.erfc(-1j), (1.0 + 1.650425758797542876j))
+    assert ae(fp.erfc((2+3j)), (21.829461427614568389 - 8.6873182714701631444j), 1e-13)
+    assert ae(fp.erfc(-(2+3j)), (-19.829461427614568389 + 8.6873182714701631444j), 1e-13)
+    assert ae(fp.erfc((8+9j)), (1072005.2525062051158 - 364149.91954310255423j))
+    assert ae(fp.erfc(-(8+9j)), (-1072003.2525062051158 + 364149.91954310255423j))
+    assert ae(fp.erfc(20+0j), (5.3958656116079009289e-176 + 0.0j))
+def test_fp_lambertw():
+    assert ae(fp.lambertw(0.0), 0.0)
+    assert ae(fp.lambertw(1.0), 0.567143290409783873)
+    assert ae(fp.lambertw(7.5), 1.5662309537823875394)
+    assert ae(fp.lambertw(-0.25), -0.35740295618138890307)
+    assert ae(fp.lambertw(-10.0), (1.3699809685212708156 + 2.140194527074713196j))
+    assert ae(fp.lambertw(0+0j), (0.0 + 0.0j))
+    assert ae(fp.lambertw(4+0j), (1.2021678731970429392 + 0.0j))
+    assert ae(fp.lambertw(1000.5), 5.2500227450408980127)
+    assert ae(fp.lambertw(1e100), 224.84310644511850156)
+    assert ae(fp.lambertw(-1000.0), (5.1501630246362515223 + 2.6641981432905204596j))
+    assert ae(fp.lambertw(1e-10), 9.9999999990000003645e-11)
+    assert ae(fp.lambertw(1e-10j), (1.0000000000000000728e-20 + 1.0000000000000000364e-10j))
+    assert ae(fp.lambertw(3+4j), (1.2815618061237758782 + 0.53309522202097107131j))
+    assert ae(fp.lambertw(-3-4j), (1.0750730665692549276 - 1.3251023817343588823j))
+    assert ae(fp.lambertw(10000+1000j), (7.2361526563371602186 + 0.087567810943839352034j))
+    assert ae(fp.lambertw(0.0, -1), -fp.inf)
+    assert ae(fp.lambertw(1.0, -1), (-1.5339133197935745079 - 4.3751851530618983855j))
+    assert ae(fp.lambertw(7.5, -1), (0.44125668415098614999 - 4.8039842008452390179j))
+    assert ae(fp.lambertw(-0.25, -1), -2.1532923641103496492)
+    assert ae(fp.lambertw(-10.0, -1), (1.3699809685212708156 - 2.140194527074713196j))
+    assert ae(fp.lambertw(0+0j, -1), -fp.inf)
+    assert ae(fp.lambertw(4+0j, -1), (-0.15730793189620765317 - 4.6787800704666656212j))
+    assert ae(fp.lambertw(1000.5, -1), (4.9153765415404024736 - 5.4465682700815159569j))
+    assert ae(fp.lambertw(1e100, -1), (224.84272130101601052 - 6.2553713838167244141j))
+    assert ae(fp.lambertw(-1000.0, -1), (5.1501630246362515223 - 2.6641981432905204596j))
+    assert ae(fp.lambertw(1e-10, -1), (-26.303186778379041521 - 3.2650939117038283975j))
+    assert ae(fp.lambertw(1e-10j, -1), (-26.297238779529035028 - 1.6328071613455765135j))
+    assert ae(fp.lambertw(3+4j, -1), (0.25856740686699741676 - 3.8521166861614355895j))
+    assert ae(fp.lambertw(-3-4j, -1), (-0.32028750204310768396 - 6.8801677192091972343j))
+    assert ae(fp.lambertw(10000+1000j, -1), (7.0255308742285435567 - 5.5177506835734067601j))
+    assert ae(fp.lambertw(0.0, 2), -fp.inf)
+    assert ae(fp.lambertw(1.0, 2), (-2.4015851048680028842 + 10.776299516115070898j))
+    assert ae(fp.lambertw(7.5, 2), (-0.38003357962843791529 + 10.960916473368746184j))
+    assert ae(fp.lambertw(-0.25, 2), (-4.0558735269061511898 + 13.852334658567271386j))
+    assert ae(fp.lambertw(-10.0, 2), (-0.34479123764318858696 + 14.112740596763592363j))
+    assert ae(fp.lambertw(0+0j, 2), -fp.inf)
+    assert ae(fp.lambertw(4+0j, 2), (-1.0070343323804262788 + 10.903476551861683082j))
+    assert ae(fp.lambertw(1000.5, 2), (4.4076185165459395295 + 11.365524591091402177j))
+    assert ae(fp.lambertw(1e100, 2), (224.84156762724875878 + 12.510785262632255672j))
+    assert ae(fp.lambertw(-1000.0, 2), (4.1984245610246530756 + 14.420478573754313845j))
+    assert ae(fp.lambertw(1e-10, 2), (-26.362258095445866488 + 9.7800247407031482519j))
+    assert ae(fp.lambertw(1e-10j, 2), (-26.384250801683084252 + 11.403535950607739763j))
+    assert ae(fp.lambertw(3+4j, 2), (-0.86554679943333993562 + 11.849956798331992027j))
+    assert ae(fp.lambertw(-3-4j, 2), (-0.55792273874679112639 + 8.7173627024159324811j))
+    assert ae(fp.lambertw(10000+1000j, 2), (6.6223802254585662734 + 11.61348646825020766j))
+def test_fp_stress_ei_e1():
+    # Can be tightened on recent Pythons with more accurate math/cmath
+    ATOL = 1e-13
+    PTOL = 1e-12
+    v = fp.e1(1.1641532182693481445e-10)
+    assert ae(v, 22.296641293693077672, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(0.25)
+    assert ae(v, 1.0442826344437381945, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(1.0)
+    assert ae(v, 0.21938393439552027368, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(2.0)
+    assert ae(v, 0.048900510708061119567, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(5.0)
+    assert ae(v, 0.0011482955912753257973, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(20.0)
+    assert ae(v, 9.8355252906498816904e-11, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(30.0)
+    assert ae(v, 3.0215520106888125448e-15, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(40.0)
+    assert ae(v, 1.0367732614516569722e-19, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(50.0)
+    assert ae(v, 3.7832640295504590187e-24, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1(80.0)
+    assert ae(v, 2.2285432586884729112e-37, tol=ATOL)
+    assert type(v) is float
+    v = fp.e1((1.1641532182693481445e-10 + 0.0j))
+    assert ae(v, (22.296641293693077672 + 0.0j), tol=ATOL)
+    assert ae(v.real, 22.296641293693077672, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((0.25 + 0.0j))
+    assert ae(v, (1.0442826344437381945 + 0.0j), tol=ATOL)
+    assert ae(v.real, 1.0442826344437381945, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((1.0 + 0.0j))
+    assert ae(v, (0.21938393439552027368 + 0.0j), tol=ATOL)
+    assert ae(v.real, 0.21938393439552027368, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((2.0 + 0.0j))
+    assert ae(v, (0.048900510708061119567 + 0.0j), tol=ATOL)
+    assert ae(v.real, 0.048900510708061119567, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((5.0 + 0.0j))
+    assert ae(v, (0.0011482955912753257973 + 0.0j), tol=ATOL)
+    assert ae(v.real, 0.0011482955912753257973, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((20.0 + 0.0j))
+    assert ae(v, (9.8355252906498816904e-11 + 0.0j), tol=ATOL)
+    assert ae(v.real, 9.8355252906498816904e-11, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((30.0 + 0.0j))
+    assert ae(v, (3.0215520106888125448e-15 + 0.0j), tol=ATOL)
+    assert ae(v.real, 3.0215520106888125448e-15, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((40.0 + 0.0j))
+    assert ae(v, (1.0367732614516569722e-19 + 0.0j), tol=ATOL)
+    assert ae(v.real, 1.0367732614516569722e-19, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((50.0 + 0.0j))
+    assert ae(v, (3.7832640295504590187e-24 + 0.0j), tol=ATOL)
+    assert ae(v.real, 3.7832640295504590187e-24, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((80.0 + 0.0j))
+    assert ae(v, (2.2285432586884729112e-37 + 0.0j), tol=ATOL)
+    assert ae(v.real, 2.2285432586884729112e-37, tol=PTOL)
+    assert v.imag == 0
+    v = fp.e1((4.6566128730773925781e-10 + 1.1641532182693481445e-10j))
+    assert ae(v, (20.880034622014215597 - 0.24497866301044883237j), tol=ATOL)
+    assert ae(v.real, 20.880034622014215597, tol=PTOL)
+    assert ae(v.imag, -0.24497866301044883237, tol=PTOL)
+    v = fp.e1((1.0 + 0.25j))
+    assert ae(v, (0.19731063945004229095 - 0.087366045774299963672j), tol=ATOL)
+    assert ae(v.real, 0.19731063945004229095, tol=PTOL)
+    assert ae(v.imag, -0.087366045774299963672, tol=PTOL)
+    v = fp.e1((4.0 + 1.0j))
+    assert ae(v, (0.0013106173980145506944 - 0.0034542480199350626699j), tol=ATOL)
+    assert ae(v.real, 0.0013106173980145506944, tol=PTOL)
+    assert ae(v.imag, -0.0034542480199350626699, tol=PTOL)
+    v = fp.e1((8.0 + 2.0j))
+    assert ae(v, (-0.000022278049065270225945 - 0.000029191940456521555288j), tol=ATOL)
+    assert ae(v.real, -0.000022278049065270225945, tol=PTOL)
+    assert ae(v.imag, -0.000029191940456521555288, tol=PTOL)
+    v = fp.e1((20.0 + 5.0j))
+    assert ae(v, (4.7711374515765346894e-11 + 8.2902652405126947359e-11j), tol=ATOL)
+    assert ae(v.real, 4.7711374515765346894e-11, tol=PTOL)
+    assert ae(v.imag, 8.2902652405126947359e-11, tol=PTOL)
+    v = fp.e1((80.0 + 20.0j))
+    assert ae(v, (3.8353473865788235787e-38 - 2.129247592349605139e-37j), tol=ATOL)
+    assert ae(v.real, 3.8353473865788235787e-38, tol=PTOL)
+    assert ae(v.imag, -2.129247592349605139e-37, tol=PTOL)
+    v = fp.e1((120.0 + 30.0j))
+    assert ae(v, (2.3836002337480334716e-55 + 5.6704043587126198306e-55j), tol=ATOL)
+    assert ae(v.real, 2.3836002337480334716e-55, tol=PTOL)
+    assert ae(v.imag, 5.6704043587126198306e-55, tol=PTOL)
+    v = fp.e1((160.0 + 40.0j))
+    assert ae(v, (-1.6238022898654510661e-72 - 1.104172355572287367e-72j), tol=ATOL)
+    assert ae(v.real, -1.6238022898654510661e-72, tol=PTOL)
+    assert ae(v.imag, -1.104172355572287367e-72, tol=PTOL)
+    v = fp.e1((200.0 + 50.0j))
+    assert ae(v, (6.6800061461666228487e-90 + 1.4473816083541016115e-91j), tol=ATOL)
+    assert ae(v.real, 6.6800061461666228487e-90, tol=PTOL)
+    assert ae(v.imag, 1.4473816083541016115e-91, tol=PTOL)
+    v = fp.e1((320.0 + 80.0j))
+    assert ae(v, (4.2737871527778786157e-143 + 3.1789935525785660314e-142j), tol=ATOL)
+    assert ae(v.real, 4.2737871527778786157e-143, tol=PTOL)
+    assert ae(v.imag, 3.1789935525785660314e-142, tol=PTOL)
+    v = fp.e1((1.1641532182693481445e-10 + 1.1641532182693481445e-10j))
+    assert ae(v, (21.950067703413105017 - 0.7853981632810329878j), tol=ATOL)
+    assert ae(v.real, 21.950067703413105017, tol=PTOL)
+    assert ae(v.imag, -0.7853981632810329878, tol=PTOL)
+    v = fp.e1((0.25 + 0.25j))
+    assert ae(v, (0.71092525792923287894 - 0.56491812441304194711j), tol=ATOL)
+    assert ae(v.real, 0.71092525792923287894, tol=PTOL)
+    assert ae(v.imag, -0.56491812441304194711, tol=PTOL)
+    v = fp.e1((1.0 + 1.0j))
+    assert ae(v, (0.00028162445198141832551 - 0.17932453503935894015j), tol=ATOL)
+    assert ae(v.real, 0.00028162445198141832551, tol=PTOL)
+    assert ae(v.imag, -0.17932453503935894015, tol=PTOL)
+    v = fp.e1((2.0 + 2.0j))
+    assert ae(v, (-0.033767089606562004246 - 0.018599414169750541925j), tol=ATOL)
+    assert ae(v.real, -0.033767089606562004246, tol=PTOL)
+    assert ae(v.imag, -0.018599414169750541925, tol=PTOL)
+    v = fp.e1((5.0 + 5.0j))
+    assert ae(v, (0.0007266506660356393891 + 0.00047102780163522245054j), tol=ATOL)
+    assert ae(v.real, 0.0007266506660356393891, tol=PTOL)
+    assert ae(v.imag, 0.00047102780163522245054, tol=PTOL)
+    v = fp.e1((20.0 + 20.0j))
+    assert ae(v, (-2.3824537449367396579e-11 - 6.6969873156525615158e-11j), tol=ATOL)
+    assert ae(v.real, -2.3824537449367396579e-11, tol=PTOL)
+    assert ae(v.imag, -6.6969873156525615158e-11, tol=PTOL)
+    v = fp.e1((30.0 + 30.0j))
+    assert ae(v, (1.7316045841744061617e-15 + 1.3065678019487308689e-15j), tol=ATOL)
+    assert ae(v.real, 1.7316045841744061617e-15, tol=PTOL)
+    assert ae(v.imag, 1.3065678019487308689e-15, tol=PTOL)
+    v = fp.e1((40.0 + 40.0j))
+    assert ae(v, (-7.4001043002899232182e-20 - 4.991847855336816304e-21j), tol=ATOL)
+    assert ae(v.real, -7.4001043002899232182e-20, tol=PTOL)
+    assert ae(v.imag, -4.991847855336816304e-21, tol=PTOL)
+    v = fp.e1((50.0 + 50.0j))
+    assert ae(v, (2.3566128324644641219e-24 - 1.3188326726201614778e-24j), tol=ATOL)
+    assert ae(v.real, 2.3566128324644641219e-24, tol=PTOL)
+    assert ae(v.imag, -1.3188326726201614778e-24, tol=PTOL)
+    v = fp.e1((80.0 + 80.0j))
+    assert ae(v, (9.8279750572186526673e-38 + 1.243952841288868831e-37j), tol=ATOL)
+    assert ae(v.real, 9.8279750572186526673e-38, tol=PTOL)
+    assert ae(v.imag, 1.243952841288868831e-37, tol=PTOL)
+    v = fp.e1((1.1641532182693481445e-10 + 4.6566128730773925781e-10j))
+    assert ae(v, (20.880034621664969632 - 1.3258176632023711778j), tol=ATOL)
+    assert ae(v.real, 20.880034621664969632, tol=PTOL)
+    assert ae(v.imag, -1.3258176632023711778, tol=PTOL)
+    v = fp.e1((0.25 + 1.0j))
+    assert ae(v, (-0.16868306393667788761 - 0.4858011885947426971j), tol=ATOL)
+    assert ae(v.real, -0.16868306393667788761, tol=PTOL)
+    assert ae(v.imag, -0.4858011885947426971, tol=PTOL)
+    v = fp.e1((1.0 + 4.0j))
+    assert ae(v, (0.03373591813926547318 + 0.073523452241083821877j), tol=ATOL)
+    assert ae(v.real, 0.03373591813926547318, tol=PTOL)
+    assert ae(v.imag, 0.073523452241083821877, tol=PTOL)
+    v = fp.e1((2.0 + 8.0j))
+    assert ae(v, (-0.015392833434733785143 - 0.0031747121557605415914j), tol=ATOL)
+    assert ae(v.real, -0.015392833434733785143, tol=PTOL)
+    assert ae(v.imag, -0.0031747121557605415914, tol=PTOL)
+    v = fp.e1((5.0 + 20.0j))
+    assert ae(v, (-0.00024419662286542966525 - 0.00021008322966152755674j), tol=ATOL)
+    assert ae(v.real, -0.00024419662286542966525, tol=PTOL)
+    assert ae(v.imag, -0.00021008322966152755674, tol=PTOL)
+    v = fp.e1((20.0 + 80.0j))
+    assert ae(v, (2.3255552781051330088e-11 + 8.9463918891349438007e-12j), tol=ATOL)
+    assert ae(v.real, 2.3255552781051330088e-11, tol=PTOL)
+    assert ae(v.imag, 8.9463918891349438007e-12, tol=PTOL)
+    v = fp.e1((30.0 + 120.0j))
+    assert ae(v, (-2.7068919097124652332e-16 - 7.0477762411705130239e-16j), tol=ATOL)
+    assert ae(v.real, -2.7068919097124652332e-16, tol=PTOL)
+    assert ae(v.imag, -7.0477762411705130239e-16, tol=PTOL)
+    v = fp.e1((40.0 + 160.0j))
+    assert ae(v, (-1.1695597827678024687e-20 + 2.2907401455645736661e-20j), tol=ATOL)
+    assert ae(v.real, -1.1695597827678024687e-20, tol=PTOL)
+    assert ae(v.imag, 2.2907401455645736661e-20, tol=PTOL)
+    v = fp.e1((50.0 + 200.0j))
+    assert ae(v, (9.0323746914410162531e-25 - 2.3950601790033530935e-25j), tol=ATOL)
+    assert ae(v.real, 9.0323746914410162531e-25, tol=PTOL)
+    assert ae(v.imag, -2.3950601790033530935e-25, tol=PTOL)
+    v = fp.e1((80.0 + 320.0j))
+    assert ae(v, (3.4819106748728063576e-38 - 4.215653005615772724e-38j), tol=ATOL)
+    assert ae(v.real, 3.4819106748728063576e-38, tol=PTOL)
+    assert ae(v.imag, -4.215653005615772724e-38, tol=PTOL)
+    v = fp.e1((0.0 + 1.1641532182693481445e-10j))
+    assert ae(v, (22.29664129357666235 - 1.5707963266784812974j), tol=ATOL)
+    assert ae(v.real, 22.29664129357666235, tol=PTOL)
+    assert ae(v.imag, -1.5707963266784812974, tol=PTOL)
+    v = fp.e1((0.0 + 0.25j))
+    assert ae(v, (0.82466306258094565309 - 1.3216627564751394551j), tol=ATOL)
+    assert ae(v.real, 0.82466306258094565309, tol=PTOL)
+    assert ae(v.imag, -1.3216627564751394551, tol=PTOL)
+    v = fp.e1((0.0 + 1.0j))
+    assert ae(v, (-0.33740392290096813466 - 0.62471325642771360429j), tol=ATOL)
+    assert ae(v.real, -0.33740392290096813466, tol=PTOL)
+    assert ae(v.imag, -0.62471325642771360429, tol=PTOL)
+    v = fp.e1((0.0 + 2.0j))
+    assert ae(v, (-0.4229808287748649957 + 0.034616650007798229345j), tol=ATOL)
+    assert ae(v.real, -0.4229808287748649957, tol=PTOL)
+    assert ae(v.imag, 0.034616650007798229345, tol=PTOL)
+    v = fp.e1((0.0 + 5.0j))
+    assert ae(v, (0.19002974965664387862 - 0.020865081850222481957j), tol=ATOL)
+    assert ae(v.real, 0.19002974965664387862, tol=PTOL)
+    assert ae(v.imag, -0.020865081850222481957, tol=PTOL)
+    v = fp.e1((0.0 + 20.0j))
+    assert ae(v, (-0.04441982084535331654 - 0.022554625751456779068j), tol=ATOL)
+    assert ae(v.real, -0.04441982084535331654, tol=PTOL)
+    assert ae(v.imag, -0.022554625751456779068, tol=PTOL)
+    v = fp.e1((0.0 + 30.0j))
+    assert ae(v, (0.033032417282071143779 - 0.0040397867645455082476j), tol=ATOL)
+    assert ae(v.real, 0.033032417282071143779, tol=PTOL)
+    assert ae(v.imag, -0.0040397867645455082476, tol=PTOL)
+    v = fp.e1((0.0 + 40.0j))
+    assert ae(v, (-0.019020007896208766962 + 0.016188792559887887544j), tol=ATOL)
+    assert ae(v.real, -0.019020007896208766962, tol=PTOL)
+    assert ae(v.imag, 0.016188792559887887544, tol=PTOL)
+    v = fp.e1((0.0 + 50.0j))
+    assert ae(v, (0.0056283863241163054402 - 0.019179254308960724503j), tol=ATOL)
+    assert ae(v.real, 0.0056283863241163054402, tol=PTOL)
+    assert ae(v.imag, -0.019179254308960724503, tol=PTOL)
+    v = fp.e1((0.0 + 80.0j))
+    assert ae(v, (0.012402501155070958192 + 0.0015345601175906961199j), tol=ATOL)
+    assert ae(v.real, 0.012402501155070958192, tol=PTOL)
+    assert ae(v.imag, 0.0015345601175906961199, tol=PTOL)
+    v = fp.e1((-1.1641532182693481445e-10 + 4.6566128730773925781e-10j))
+    assert ae(v, (20.880034621432138988 - 1.8157749894560994861j), tol=ATOL)
+    assert ae(v.real, 20.880034621432138988, tol=PTOL)
+    assert ae(v.imag, -1.8157749894560994861, tol=PTOL)
+    v = fp.e1((-0.25 + 1.0j))
+    assert ae(v, (-0.59066621214766308594 - 0.74474454765205036972j), tol=ATOL)
+    assert ae(v.real, -0.59066621214766308594, tol=PTOL)
+    assert ae(v.imag, -0.74474454765205036972, tol=PTOL)
+    v = fp.e1((-1.0 + 4.0j))
+    assert ae(v, (0.49739047283060471093 + 0.41543605404038863174j), tol=ATOL)
+    assert ae(v.real, 0.49739047283060471093, tol=PTOL)
+    assert ae(v.imag, 0.41543605404038863174, tol=PTOL)
+    v = fp.e1((-2.0 + 8.0j))
+    assert ae(v, (-0.8705211147733730969 + 0.24099328498605539667j), tol=ATOL)
+    assert ae(v.real, -0.8705211147733730969, tol=PTOL)
+    assert ae(v.imag, 0.24099328498605539667, tol=PTOL)
+    v = fp.e1((-5.0 + 20.0j))
+    assert ae(v, (-7.0789514293925893007 - 1.6102177171960790536j), tol=ATOL)
+    assert ae(v.real, -7.0789514293925893007, tol=PTOL)
+    assert ae(v.imag, -1.6102177171960790536, tol=PTOL)
+    v = fp.e1((-20.0 + 80.0j))
+    assert ae(v, (5855431.4907298084434 - 720920.93315409165707j), tol=ATOL)
+    assert ae(v.real, 5855431.4907298084434, tol=PTOL)
+    assert ae(v.imag, -720920.93315409165707, tol=PTOL)
+    v = fp.e1((-30.0 + 120.0j))
+    assert ae(v, (-65402491644.703470747 - 56697658399.657460294j), tol=ATOL)
+    assert ae(v.real, -65402491644.703470747, tol=PTOL)
+    assert ae(v.imag, -56697658399.657460294, tol=PTOL)
+    v = fp.e1((-40.0 + 160.0j))
+    assert ae(v, (25504929379604.776769 + 1429035198630573.2463j), tol=ATOL)
+    assert ae(v.real, 25504929379604.776769, tol=PTOL)
+    assert ae(v.imag, 1429035198630573.2463, tol=PTOL)
+    v = fp.e1((-50.0 + 200.0j))
+    assert ae(v, (18437746526988116954.0 - 17146362239046152345.0j), tol=ATOL)
+    assert ae(v.real, 18437746526988116954.0, tol=PTOL)
+    assert ae(v.imag, -17146362239046152345.0, tol=PTOL)
+    v = fp.e1((-80.0 + 320.0j))
+    assert ae(v, (3.3464697299634526706e+31 - 1.6473152633843023919e+32j), tol=ATOL)
+    assert ae(v.real, 3.3464697299634526706e+31, tol=PTOL)
+    assert ae(v.imag, -1.6473152633843023919e+32, tol=PTOL)
+    v = fp.e1((-4.6566128730773925781e-10 + 1.1641532182693481445e-10j))
+    assert ae(v, (20.880034621082893023 - 2.8966139903465137624j), tol=ATOL)
+    assert ae(v.real, 20.880034621082893023, tol=PTOL)
+    assert ae(v.imag, -2.8966139903465137624, tol=PTOL)
+    v = fp.e1((-1.0 + 0.25j))
+    assert ae(v, (-1.8942716983721074932 - 2.4689102827070540799j), tol=ATOL)
+    assert ae(v.real, -1.8942716983721074932, tol=PTOL)
+    assert ae(v.imag, -2.4689102827070540799, tol=PTOL)
+    v = fp.e1((-4.0 + 1.0j))
+    assert ae(v, (-14.806699492675420438 + 9.1384225230837893776j), tol=ATOL)
+    assert ae(v.real, -14.806699492675420438, tol=PTOL)
+    assert ae(v.imag, 9.1384225230837893776, tol=PTOL)
+    v = fp.e1((-8.0 + 2.0j))
+    assert ae(v, (54.633252667426386294 + 413.20318163814670688j), tol=ATOL)
+    assert ae(v.real, 54.633252667426386294, tol=PTOL)
+    assert ae(v.imag, 413.20318163814670688, tol=PTOL)
+    v = fp.e1((-20.0 + 5.0j))
+    assert ae(v, (-711836.97165402624643 - 24745250.939695900956j), tol=ATOL)
+    assert ae(v.real, -711836.97165402624643, tol=PTOL)
+    assert ae(v.imag, -24745250.939695900956, tol=PTOL)
+    v = fp.e1((-80.0 + 20.0j))
+    assert ae(v, (-4.2139911108612653091e+32 + 5.3367124741918251637e+32j), tol=ATOL)
+    assert ae(v.real, -4.2139911108612653091e+32, tol=PTOL)
+    assert ae(v.imag, 5.3367124741918251637e+32, tol=PTOL)
+    v = fp.e1((-120.0 + 30.0j))
+    assert ae(v, (9.7760616203707508892e+48 - 1.058257682317195792e+50j), tol=ATOL)
+    assert ae(v.real, 9.7760616203707508892e+48, tol=PTOL)
+    assert ae(v.imag, -1.058257682317195792e+50, tol=PTOL)
+    v = fp.e1((-160.0 + 40.0j))
+    assert ae(v, (8.7065541466623638861e+66 + 1.6577106725141739889e+67j), tol=ATOL)
+    assert ae(v.real, 8.7065541466623638861e+66, tol=PTOL)
+    assert ae(v.imag, 1.6577106725141739889e+67, tol=PTOL)
+    v = fp.e1((-200.0 + 50.0j))
+    assert ae(v, (-3.070744996327018106e+84 - 1.7243244846769415903e+84j), tol=ATOL)
+    assert ae(v.real, -3.070744996327018106e+84, tol=PTOL)
+    assert ae(v.imag, -1.7243244846769415903e+84, tol=PTOL)
+    v = fp.e1((-320.0 + 80.0j))
+    assert ae(v, (9.9960598637998647276e+135 - 2.6855081527595608863e+136j), tol=ATOL)
+    assert ae(v.real, 9.9960598637998647276e+135, tol=PTOL)
+    assert ae(v.imag, -2.6855081527595608863e+136, tol=PTOL)
+    v = fp.e1(-1.1641532182693481445e-10)
+    assert ae(v, (22.296641293460247028 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 22.296641293460247028, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-0.25)
+    assert ae(v, (0.54254326466191372953 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 0.54254326466191372953, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-1.0)
+    assert ae(v, (-1.8951178163559367555 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -1.8951178163559367555, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-2.0)
+    assert ae(v, (-4.9542343560018901634 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -4.9542343560018901634, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-5.0)
+    assert ae(v, (-40.185275355803177455 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -40.185275355803177455, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-20.0)
+    assert ae(v, (-25615652.66405658882 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -25615652.66405658882, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-30.0)
+    assert ae(v, (-368973209407.27419706 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -368973209407.27419706, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-40.0)
+    assert ae(v, (-6039718263611241.5784 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -6039718263611241.5784, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-50.0)
+    assert ae(v, (-1.0585636897131690963e+20 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -1.0585636897131690963e+20, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1(-80.0)
+    assert ae(v, (-7.0146000049047999696e+32 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -7.0146000049047999696e+32, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-1.1641532182693481445e-10 + 0.0j))
+    assert ae(v, (22.296641293460247028 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 22.296641293460247028, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-0.25 + 0.0j))
+    assert ae(v, (0.54254326466191372953 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 0.54254326466191372953, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-1.0 + 0.0j))
+    assert ae(v, (-1.8951178163559367555 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -1.8951178163559367555, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-2.0 + 0.0j))
+    assert ae(v, (-4.9542343560018901634 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -4.9542343560018901634, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-5.0 + 0.0j))
+    assert ae(v, (-40.185275355803177455 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -40.185275355803177455, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-20.0 + 0.0j))
+    assert ae(v, (-25615652.66405658882 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -25615652.66405658882, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-30.0 + 0.0j))
+    assert ae(v, (-368973209407.27419706 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -368973209407.27419706, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-40.0 + 0.0j))
+    assert ae(v, (-6039718263611241.5784 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -6039718263611241.5784, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-50.0 + 0.0j))
+    assert ae(v, (-1.0585636897131690963e+20 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -1.0585636897131690963e+20, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-80.0 + 0.0j))
+    assert ae(v, (-7.0146000049047999696e+32 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -7.0146000049047999696e+32, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.e1((-4.6566128730773925781e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (20.880034621082893023 + 2.8966139903465137624j), tol=ATOL)
+    assert ae(v.real, 20.880034621082893023, tol=PTOL)
+    assert ae(v.imag, 2.8966139903465137624, tol=PTOL)
+    v = fp.e1((-1.0 - 0.25j))
+    assert ae(v, (-1.8942716983721074932 + 2.4689102827070540799j), tol=ATOL)
+    assert ae(v.real, -1.8942716983721074932, tol=PTOL)
+    assert ae(v.imag, 2.4689102827070540799, tol=PTOL)
+    v = fp.e1((-4.0 - 1.0j))
+    assert ae(v, (-14.806699492675420438 - 9.1384225230837893776j), tol=ATOL)
+    assert ae(v.real, -14.806699492675420438, tol=PTOL)
+    assert ae(v.imag, -9.1384225230837893776, tol=PTOL)
+    v = fp.e1((-8.0 - 2.0j))
+    assert ae(v, (54.633252667426386294 - 413.20318163814670688j), tol=ATOL)
+    assert ae(v.real, 54.633252667426386294, tol=PTOL)
+    assert ae(v.imag, -413.20318163814670688, tol=PTOL)
+    v = fp.e1((-20.0 - 5.0j))
+    assert ae(v, (-711836.97165402624643 + 24745250.939695900956j), tol=ATOL)
+    assert ae(v.real, -711836.97165402624643, tol=PTOL)
+    assert ae(v.imag, 24745250.939695900956, tol=PTOL)
+    v = fp.e1((-80.0 - 20.0j))
+    assert ae(v, (-4.2139911108612653091e+32 - 5.3367124741918251637e+32j), tol=ATOL)
+    assert ae(v.real, -4.2139911108612653091e+32, tol=PTOL)
+    assert ae(v.imag, -5.3367124741918251637e+32, tol=PTOL)
+    v = fp.e1((-120.0 - 30.0j))
+    assert ae(v, (9.7760616203707508892e+48 + 1.058257682317195792e+50j), tol=ATOL)
+    assert ae(v.real, 9.7760616203707508892e+48, tol=PTOL)
+    assert ae(v.imag, 1.058257682317195792e+50, tol=PTOL)
+    v = fp.e1((-160.0 - 40.0j))
+    assert ae(v, (8.7065541466623638861e+66 - 1.6577106725141739889e+67j), tol=ATOL)
+    assert ae(v.real, 8.7065541466623638861e+66, tol=PTOL)
+    assert ae(v.imag, -1.6577106725141739889e+67, tol=PTOL)
+    v = fp.e1((-200.0 - 50.0j))
+    assert ae(v, (-3.070744996327018106e+84 + 1.7243244846769415903e+84j), tol=ATOL)
+    assert ae(v.real, -3.070744996327018106e+84, tol=PTOL)
+    assert ae(v.imag, 1.7243244846769415903e+84, tol=PTOL)
+    v = fp.e1((-320.0 - 80.0j))
+    assert ae(v, (9.9960598637998647276e+135 + 2.6855081527595608863e+136j), tol=ATOL)
+    assert ae(v.real, 9.9960598637998647276e+135, tol=PTOL)
+    assert ae(v.imag, 2.6855081527595608863e+136, tol=PTOL)
+    v = fp.e1((-1.1641532182693481445e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (21.950067703180274374 + 2.356194490075929607j), tol=ATOL)
+    assert ae(v.real, 21.950067703180274374, tol=PTOL)
+    assert ae(v.imag, 2.356194490075929607, tol=PTOL)
+    v = fp.e1((-0.25 - 0.25j))
+    assert ae(v, (0.21441047326710323254 + 2.0732153554307936389j), tol=ATOL)
+    assert ae(v.real, 0.21441047326710323254, tol=PTOL)
+    assert ae(v.imag, 2.0732153554307936389, tol=PTOL)
+    v = fp.e1((-1.0 - 1.0j))
+    assert ae(v, (-1.7646259855638540684 + 0.7538228020792708192j), tol=ATOL)
+    assert ae(v.real, -1.7646259855638540684, tol=PTOL)
+    assert ae(v.imag, 0.7538228020792708192, tol=PTOL)
+    v = fp.e1((-2.0 - 2.0j))
+    assert ae(v, (-1.8920781621855474089 - 2.1753697842428647236j), tol=ATOL)
+    assert ae(v.real, -1.8920781621855474089, tol=PTOL)
+    assert ae(v.imag, -2.1753697842428647236, tol=PTOL)
+    v = fp.e1((-5.0 - 5.0j))
+    assert ae(v, (13.470936071475245856 + 18.464085049321024206j), tol=ATOL)
+    assert ae(v.real, 13.470936071475245856, tol=PTOL)
+    assert ae(v.imag, 18.464085049321024206, tol=PTOL)
+    v = fp.e1((-20.0 - 20.0j))
+    assert ae(v, (-16589317.398788971896 - 5831702.3296441771206j), tol=ATOL)
+    assert ae(v.real, -16589317.398788971896, tol=PTOL)
+    assert ae(v.imag, -5831702.3296441771206, tol=PTOL)
+    v = fp.e1((-30.0 - 30.0j))
+    assert ae(v, (154596484273.69322527 + 204179357837.41389696j), tol=ATOL)
+    assert ae(v.real, 154596484273.69322527, tol=PTOL)
+    assert ae(v.imag, 204179357837.41389696, tol=PTOL)
+    v = fp.e1((-40.0 - 40.0j))
+    assert ae(v, (-287512180321448.45408 - 4203502407932314.974j), tol=ATOL)
+    assert ae(v.real, -287512180321448.45408, tol=PTOL)
+    assert ae(v.imag, -4203502407932314.974, tol=PTOL)
+    v = fp.e1((-50.0 - 50.0j))
+    assert ae(v, (-36128528616649268826.0 + 64648801861338741963.0j), tol=ATOL)
+    assert ae(v.real, -36128528616649268826.0, tol=PTOL)
+    assert ae(v.imag, 64648801861338741963.0, tol=PTOL)
+    v = fp.e1((-80.0 - 80.0j))
+    assert ae(v, (3.8674816337930010217e+32 + 3.0540709639658071041e+32j), tol=ATOL)
+    assert ae(v.real, 3.8674816337930010217e+32, tol=PTOL)
+    assert ae(v.imag, 3.0540709639658071041e+32, tol=PTOL)
+    v = fp.e1((-1.1641532182693481445e-10 - 4.6566128730773925781e-10j))
+    assert ae(v, (20.880034621432138988 + 1.8157749894560994861j), tol=ATOL)
+    assert ae(v.real, 20.880034621432138988, tol=PTOL)
+    assert ae(v.imag, 1.8157749894560994861, tol=PTOL)
+    v = fp.e1((-0.25 - 1.0j))
+    assert ae(v, (-0.59066621214766308594 + 0.74474454765205036972j), tol=ATOL)
+    assert ae(v.real, -0.59066621214766308594, tol=PTOL)
+    assert ae(v.imag, 0.74474454765205036972, tol=PTOL)
+    v = fp.e1((-1.0 - 4.0j))
+    assert ae(v, (0.49739047283060471093 - 0.41543605404038863174j), tol=ATOL)
+    assert ae(v.real, 0.49739047283060471093, tol=PTOL)
+    assert ae(v.imag, -0.41543605404038863174, tol=PTOL)
+    v = fp.e1((-2.0 - 8.0j))
+    assert ae(v, (-0.8705211147733730969 - 0.24099328498605539667j), tol=ATOL)
+    assert ae(v.real, -0.8705211147733730969, tol=PTOL)
+    assert ae(v.imag, -0.24099328498605539667, tol=PTOL)
+    v = fp.e1((-5.0 - 20.0j))
+    assert ae(v, (-7.0789514293925893007 + 1.6102177171960790536j), tol=ATOL)
+    assert ae(v.real, -7.0789514293925893007, tol=PTOL)
+    assert ae(v.imag, 1.6102177171960790536, tol=PTOL)
+    v = fp.e1((-20.0 - 80.0j))
+    assert ae(v, (5855431.4907298084434 + 720920.93315409165707j), tol=ATOL)
+    assert ae(v.real, 5855431.4907298084434, tol=PTOL)
+    assert ae(v.imag, 720920.93315409165707, tol=PTOL)
+    v = fp.e1((-30.0 - 120.0j))
+    assert ae(v, (-65402491644.703470747 + 56697658399.657460294j), tol=ATOL)
+    assert ae(v.real, -65402491644.703470747, tol=PTOL)
+    assert ae(v.imag, 56697658399.657460294, tol=PTOL)
+    v = fp.e1((-40.0 - 160.0j))
+    assert ae(v, (25504929379604.776769 - 1429035198630573.2463j), tol=ATOL)
+    assert ae(v.real, 25504929379604.776769, tol=PTOL)
+    assert ae(v.imag, -1429035198630573.2463, tol=PTOL)
+    v = fp.e1((-50.0 - 200.0j))
+    assert ae(v, (18437746526988116954.0 + 17146362239046152345.0j), tol=ATOL)
+    assert ae(v.real, 18437746526988116954.0, tol=PTOL)
+    assert ae(v.imag, 17146362239046152345.0, tol=PTOL)
+    v = fp.e1((-80.0 - 320.0j))
+    assert ae(v, (3.3464697299634526706e+31 + 1.6473152633843023919e+32j), tol=ATOL)
+    assert ae(v.real, 3.3464697299634526706e+31, tol=PTOL)
+    assert ae(v.imag, 1.6473152633843023919e+32, tol=PTOL)
+    v = fp.e1((0.0 - 1.1641532182693481445e-10j))
+    assert ae(v, (22.29664129357666235 + 1.5707963266784812974j), tol=ATOL)
+    assert ae(v.real, 22.29664129357666235, tol=PTOL)
+    assert ae(v.imag, 1.5707963266784812974, tol=PTOL)
+    v = fp.e1((0.0 - 0.25j))
+    assert ae(v, (0.82466306258094565309 + 1.3216627564751394551j), tol=ATOL)
+    assert ae(v.real, 0.82466306258094565309, tol=PTOL)
+    assert ae(v.imag, 1.3216627564751394551, tol=PTOL)
+    v = fp.e1((0.0 - 1.0j))
+    assert ae(v, (-0.33740392290096813466 + 0.62471325642771360429j), tol=ATOL)
+    assert ae(v.real, -0.33740392290096813466, tol=PTOL)
+    assert ae(v.imag, 0.62471325642771360429, tol=PTOL)
+    v = fp.e1((0.0 - 2.0j))
+    assert ae(v, (-0.4229808287748649957 - 0.034616650007798229345j), tol=ATOL)
+    assert ae(v.real, -0.4229808287748649957, tol=PTOL)
+    assert ae(v.imag, -0.034616650007798229345, tol=PTOL)
+    v = fp.e1((0.0 - 5.0j))
+    assert ae(v, (0.19002974965664387862 + 0.020865081850222481957j), tol=ATOL)
+    assert ae(v.real, 0.19002974965664387862, tol=PTOL)
+    assert ae(v.imag, 0.020865081850222481957, tol=PTOL)
+    v = fp.e1((0.0 - 20.0j))
+    assert ae(v, (-0.04441982084535331654 + 0.022554625751456779068j), tol=ATOL)
+    assert ae(v.real, -0.04441982084535331654, tol=PTOL)
+    assert ae(v.imag, 0.022554625751456779068, tol=PTOL)
+    v = fp.e1((0.0 - 30.0j))
+    assert ae(v, (0.033032417282071143779 + 0.0040397867645455082476j), tol=ATOL)
+    assert ae(v.real, 0.033032417282071143779, tol=PTOL)
+    assert ae(v.imag, 0.0040397867645455082476, tol=PTOL)
+    v = fp.e1((0.0 - 40.0j))
+    assert ae(v, (-0.019020007896208766962 - 0.016188792559887887544j), tol=ATOL)
+    assert ae(v.real, -0.019020007896208766962, tol=PTOL)
+    assert ae(v.imag, -0.016188792559887887544, tol=PTOL)
+    v = fp.e1((0.0 - 50.0j))
+    assert ae(v, (0.0056283863241163054402 + 0.019179254308960724503j), tol=ATOL)
+    assert ae(v.real, 0.0056283863241163054402, tol=PTOL)
+    assert ae(v.imag, 0.019179254308960724503, tol=PTOL)
+    v = fp.e1((0.0 - 80.0j))
+    assert ae(v, (0.012402501155070958192 - 0.0015345601175906961199j), tol=ATOL)
+    assert ae(v.real, 0.012402501155070958192, tol=PTOL)
+    assert ae(v.imag, -0.0015345601175906961199, tol=PTOL)
+    v = fp.e1((1.1641532182693481445e-10 - 4.6566128730773925781e-10j))
+    assert ae(v, (20.880034621664969632 + 1.3258176632023711778j), tol=ATOL)
+    assert ae(v.real, 20.880034621664969632, tol=PTOL)
+    assert ae(v.imag, 1.3258176632023711778, tol=PTOL)
+    v = fp.e1((0.25 - 1.0j))
+    assert ae(v, (-0.16868306393667788761 + 0.4858011885947426971j), tol=ATOL)
+    assert ae(v.real, -0.16868306393667788761, tol=PTOL)
+    assert ae(v.imag, 0.4858011885947426971, tol=PTOL)
+    v = fp.e1((1.0 - 4.0j))
+    assert ae(v, (0.03373591813926547318 - 0.073523452241083821877j), tol=ATOL)
+    assert ae(v.real, 0.03373591813926547318, tol=PTOL)
+    assert ae(v.imag, -0.073523452241083821877, tol=PTOL)
+    v = fp.e1((2.0 - 8.0j))
+    assert ae(v, (-0.015392833434733785143 + 0.0031747121557605415914j), tol=ATOL)
+    assert ae(v.real, -0.015392833434733785143, tol=PTOL)
+    assert ae(v.imag, 0.0031747121557605415914, tol=PTOL)
+    v = fp.e1((5.0 - 20.0j))
+    assert ae(v, (-0.00024419662286542966525 + 0.00021008322966152755674j), tol=ATOL)
+    assert ae(v.real, -0.00024419662286542966525, tol=PTOL)
+    assert ae(v.imag, 0.00021008322966152755674, tol=PTOL)
+    v = fp.e1((20.0 - 80.0j))
+    assert ae(v, (2.3255552781051330088e-11 - 8.9463918891349438007e-12j), tol=ATOL)
+    assert ae(v.real, 2.3255552781051330088e-11, tol=PTOL)
+    assert ae(v.imag, -8.9463918891349438007e-12, tol=PTOL)
+    v = fp.e1((30.0 - 120.0j))
+    assert ae(v, (-2.7068919097124652332e-16 + 7.0477762411705130239e-16j), tol=ATOL)
+    assert ae(v.real, -2.7068919097124652332e-16, tol=PTOL)
+    assert ae(v.imag, 7.0477762411705130239e-16, tol=PTOL)
+    v = fp.e1((40.0 - 160.0j))
+    assert ae(v, (-1.1695597827678024687e-20 - 2.2907401455645736661e-20j), tol=ATOL)
+    assert ae(v.real, -1.1695597827678024687e-20, tol=PTOL)
+    assert ae(v.imag, -2.2907401455645736661e-20, tol=PTOL)
+    v = fp.e1((50.0 - 200.0j))
+    assert ae(v, (9.0323746914410162531e-25 + 2.3950601790033530935e-25j), tol=ATOL)
+    assert ae(v.real, 9.0323746914410162531e-25, tol=PTOL)
+    assert ae(v.imag, 2.3950601790033530935e-25, tol=PTOL)
+    v = fp.e1((80.0 - 320.0j))
+    assert ae(v, (3.4819106748728063576e-38 + 4.215653005615772724e-38j), tol=ATOL)
+    assert ae(v.real, 3.4819106748728063576e-38, tol=PTOL)
+    assert ae(v.imag, 4.215653005615772724e-38, tol=PTOL)
+    v = fp.e1((1.1641532182693481445e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (21.950067703413105017 + 0.7853981632810329878j), tol=ATOL)
+    assert ae(v.real, 21.950067703413105017, tol=PTOL)
+    assert ae(v.imag, 0.7853981632810329878, tol=PTOL)
+    v = fp.e1((0.25 - 0.25j))
+    assert ae(v, (0.71092525792923287894 + 0.56491812441304194711j), tol=ATOL)
+    assert ae(v.real, 0.71092525792923287894, tol=PTOL)
+    assert ae(v.imag, 0.56491812441304194711, tol=PTOL)
+    v = fp.e1((1.0 - 1.0j))
+    assert ae(v, (0.00028162445198141832551 + 0.17932453503935894015j), tol=ATOL)
+    assert ae(v.real, 0.00028162445198141832551, tol=PTOL)
+    assert ae(v.imag, 0.17932453503935894015, tol=PTOL)
+    v = fp.e1((2.0 - 2.0j))
+    assert ae(v, (-0.033767089606562004246 + 0.018599414169750541925j), tol=ATOL)
+    assert ae(v.real, -0.033767089606562004246, tol=PTOL)
+    assert ae(v.imag, 0.018599414169750541925, tol=PTOL)
+    v = fp.e1((5.0 - 5.0j))
+    assert ae(v, (0.0007266506660356393891 - 0.00047102780163522245054j), tol=ATOL)
+    assert ae(v.real, 0.0007266506660356393891, tol=PTOL)
+    assert ae(v.imag, -0.00047102780163522245054, tol=PTOL)
+    v = fp.e1((20.0 - 20.0j))
+    assert ae(v, (-2.3824537449367396579e-11 + 6.6969873156525615158e-11j), tol=ATOL)
+    assert ae(v.real, -2.3824537449367396579e-11, tol=PTOL)
+    assert ae(v.imag, 6.6969873156525615158e-11, tol=PTOL)
+    v = fp.e1((30.0 - 30.0j))
+    assert ae(v, (1.7316045841744061617e-15 - 1.3065678019487308689e-15j), tol=ATOL)
+    assert ae(v.real, 1.7316045841744061617e-15, tol=PTOL)
+    assert ae(v.imag, -1.3065678019487308689e-15, tol=PTOL)
+    v = fp.e1((40.0 - 40.0j))
+    assert ae(v, (-7.4001043002899232182e-20 + 4.991847855336816304e-21j), tol=ATOL)
+    assert ae(v.real, -7.4001043002899232182e-20, tol=PTOL)
+    assert ae(v.imag, 4.991847855336816304e-21, tol=PTOL)
+    v = fp.e1((50.0 - 50.0j))
+    assert ae(v, (2.3566128324644641219e-24 + 1.3188326726201614778e-24j), tol=ATOL)
+    assert ae(v.real, 2.3566128324644641219e-24, tol=PTOL)
+    assert ae(v.imag, 1.3188326726201614778e-24, tol=PTOL)
+    v = fp.e1((80.0 - 80.0j))
+    assert ae(v, (9.8279750572186526673e-38 - 1.243952841288868831e-37j), tol=ATOL)
+    assert ae(v.real, 9.8279750572186526673e-38, tol=PTOL)
+    assert ae(v.imag, -1.243952841288868831e-37, tol=PTOL)
+    v = fp.e1((4.6566128730773925781e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (20.880034622014215597 + 0.24497866301044883237j), tol=ATOL)
+    assert ae(v.real, 20.880034622014215597, tol=PTOL)
+    assert ae(v.imag, 0.24497866301044883237, tol=PTOL)
+    v = fp.e1((1.0 - 0.25j))
+    assert ae(v, (0.19731063945004229095 + 0.087366045774299963672j), tol=ATOL)
+    assert ae(v.real, 0.19731063945004229095, tol=PTOL)
+    assert ae(v.imag, 0.087366045774299963672, tol=PTOL)
+    v = fp.e1((4.0 - 1.0j))
+    assert ae(v, (0.0013106173980145506944 + 0.0034542480199350626699j), tol=ATOL)
+    assert ae(v.real, 0.0013106173980145506944, tol=PTOL)
+    assert ae(v.imag, 0.0034542480199350626699, tol=PTOL)
+    v = fp.e1((8.0 - 2.0j))
+    assert ae(v, (-0.000022278049065270225945 + 0.000029191940456521555288j), tol=ATOL)
+    assert ae(v.real, -0.000022278049065270225945, tol=PTOL)
+    assert ae(v.imag, 0.000029191940456521555288, tol=PTOL)
+    v = fp.e1((20.0 - 5.0j))
+    assert ae(v, (4.7711374515765346894e-11 - 8.2902652405126947359e-11j), tol=ATOL)
+    assert ae(v.real, 4.7711374515765346894e-11, tol=PTOL)
+    assert ae(v.imag, -8.2902652405126947359e-11, tol=PTOL)
+    v = fp.e1((80.0 - 20.0j))
+    assert ae(v, (3.8353473865788235787e-38 + 2.129247592349605139e-37j), tol=ATOL)
+    assert ae(v.real, 3.8353473865788235787e-38, tol=PTOL)
+    assert ae(v.imag, 2.129247592349605139e-37, tol=PTOL)
+    v = fp.e1((120.0 - 30.0j))
+    assert ae(v, (2.3836002337480334716e-55 - 5.6704043587126198306e-55j), tol=ATOL)
+    assert ae(v.real, 2.3836002337480334716e-55, tol=PTOL)
+    assert ae(v.imag, -5.6704043587126198306e-55, tol=PTOL)
+    v = fp.e1((160.0 - 40.0j))
+    assert ae(v, (-1.6238022898654510661e-72 + 1.104172355572287367e-72j), tol=ATOL)
+    assert ae(v.real, -1.6238022898654510661e-72, tol=PTOL)
+    assert ae(v.imag, 1.104172355572287367e-72, tol=PTOL)
+    v = fp.e1((200.0 - 50.0j))
+    assert ae(v, (6.6800061461666228487e-90 - 1.4473816083541016115e-91j), tol=ATOL)
+    assert ae(v.real, 6.6800061461666228487e-90, tol=PTOL)
+    assert ae(v.imag, -1.4473816083541016115e-91, tol=PTOL)
+    v = fp.e1((320.0 - 80.0j))
+    assert ae(v, (4.2737871527778786157e-143 - 3.1789935525785660314e-142j), tol=ATOL)
+    assert ae(v.real, 4.2737871527778786157e-143, tol=PTOL)
+    assert ae(v.imag, -3.1789935525785660314e-142, tol=PTOL)
+    v = fp.ei(1.1641532182693481445e-10)
+    assert ae(v, -22.296641293460247028, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(0.25)
+    assert ae(v, -0.54254326466191372953, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(1.0)
+    assert ae(v, 1.8951178163559367555, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(2.0)
+    assert ae(v, 4.9542343560018901634, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(5.0)
+    assert ae(v, 40.185275355803177455, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(20.0)
+    assert ae(v, 25615652.66405658882, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(30.0)
+    assert ae(v, 368973209407.27419706, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(40.0)
+    assert ae(v, 6039718263611241.5784, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(50.0)
+    assert ae(v, 1.0585636897131690963e+20, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(80.0)
+    assert ae(v, 7.0146000049047999696e+32, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei((1.1641532182693481445e-10 + 0.0j))
+    assert ae(v, (-22.296641293460247028 + 0.0j), tol=ATOL)
+    assert ae(v.real, -22.296641293460247028, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((0.25 + 0.0j))
+    assert ae(v, (-0.54254326466191372953 + 0.0j), tol=ATOL)
+    assert ae(v.real, -0.54254326466191372953, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((1.0 + 0.0j))
+    assert ae(v, (1.8951178163559367555 + 0.0j), tol=ATOL)
+    assert ae(v.real, 1.8951178163559367555, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((2.0 + 0.0j))
+    assert ae(v, (4.9542343560018901634 + 0.0j), tol=ATOL)
+    assert ae(v.real, 4.9542343560018901634, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((5.0 + 0.0j))
+    assert ae(v, (40.185275355803177455 + 0.0j), tol=ATOL)
+    assert ae(v.real, 40.185275355803177455, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((20.0 + 0.0j))
+    assert ae(v, (25615652.66405658882 + 0.0j), tol=ATOL)
+    assert ae(v.real, 25615652.66405658882, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((30.0 + 0.0j))
+    assert ae(v, (368973209407.27419706 + 0.0j), tol=ATOL)
+    assert ae(v.real, 368973209407.27419706, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((40.0 + 0.0j))
+    assert ae(v, (6039718263611241.5784 + 0.0j), tol=ATOL)
+    assert ae(v.real, 6039718263611241.5784, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((50.0 + 0.0j))
+    assert ae(v, (1.0585636897131690963e+20 + 0.0j), tol=ATOL)
+    assert ae(v.real, 1.0585636897131690963e+20, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((80.0 + 0.0j))
+    assert ae(v, (7.0146000049047999696e+32 + 0.0j), tol=ATOL)
+    assert ae(v.real, 7.0146000049047999696e+32, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((4.6566128730773925781e-10 + 1.1641532182693481445e-10j))
+    assert ae(v, (-20.880034621082893023 + 0.24497866324327947603j), tol=ATOL)
+    assert ae(v.real, -20.880034621082893023, tol=PTOL)
+    assert ae(v.imag, 0.24497866324327947603, tol=PTOL)
+    v = fp.ei((1.0 + 0.25j))
+    assert ae(v, (1.8942716983721074932 + 0.67268237088273915854j), tol=ATOL)
+    assert ae(v.real, 1.8942716983721074932, tol=PTOL)
+    assert ae(v.imag, 0.67268237088273915854, tol=PTOL)
+    v = fp.ei((4.0 + 1.0j))
+    assert ae(v, (14.806699492675420438 + 12.280015176673582616j), tol=ATOL)
+    assert ae(v.real, 14.806699492675420438, tol=PTOL)
+    assert ae(v.imag, 12.280015176673582616, tol=PTOL)
+    v = fp.ei((8.0 + 2.0j))
+    assert ae(v, (-54.633252667426386294 + 416.34477429173650012j), tol=ATOL)
+    assert ae(v.real, -54.633252667426386294, tol=PTOL)
+    assert ae(v.imag, 416.34477429173650012, tol=PTOL)
+    v = fp.ei((20.0 + 5.0j))
+    assert ae(v, (711836.97165402624643 - 24745247.798103247366j), tol=ATOL)
+    assert ae(v.real, 711836.97165402624643, tol=PTOL)
+    assert ae(v.imag, -24745247.798103247366, tol=PTOL)
+    v = fp.ei((80.0 + 20.0j))
+    assert ae(v, (4.2139911108612653091e+32 + 5.3367124741918251637e+32j), tol=ATOL)
+    assert ae(v.real, 4.2139911108612653091e+32, tol=PTOL)
+    assert ae(v.imag, 5.3367124741918251637e+32, tol=PTOL)
+    v = fp.ei((120.0 + 30.0j))
+    assert ae(v, (-9.7760616203707508892e+48 - 1.058257682317195792e+50j), tol=ATOL)
+    assert ae(v.real, -9.7760616203707508892e+48, tol=PTOL)
+    assert ae(v.imag, -1.058257682317195792e+50, tol=PTOL)
+    v = fp.ei((160.0 + 40.0j))
+    assert ae(v, (-8.7065541466623638861e+66 + 1.6577106725141739889e+67j), tol=ATOL)
+    assert ae(v.real, -8.7065541466623638861e+66, tol=PTOL)
+    assert ae(v.imag, 1.6577106725141739889e+67, tol=PTOL)
+    v = fp.ei((200.0 + 50.0j))
+    assert ae(v, (3.070744996327018106e+84 - 1.7243244846769415903e+84j), tol=ATOL)
+    assert ae(v.real, 3.070744996327018106e+84, tol=PTOL)
+    assert ae(v.imag, -1.7243244846769415903e+84, tol=PTOL)
+    v = fp.ei((320.0 + 80.0j))
+    assert ae(v, (-9.9960598637998647276e+135 - 2.6855081527595608863e+136j), tol=ATOL)
+    assert ae(v.real, -9.9960598637998647276e+135, tol=PTOL)
+    assert ae(v.imag, -2.6855081527595608863e+136, tol=PTOL)
+    v = fp.ei((1.1641532182693481445e-10 + 1.1641532182693481445e-10j))
+    assert ae(v, (-21.950067703180274374 + 0.78539816351386363145j), tol=ATOL)
+    assert ae(v.real, -21.950067703180274374, tol=PTOL)
+    assert ae(v.imag, 0.78539816351386363145, tol=PTOL)
+    v = fp.ei((0.25 + 0.25j))
+    assert ae(v, (-0.21441047326710323254 + 1.0683772981589995996j), tol=ATOL)
+    assert ae(v.real, -0.21441047326710323254, tol=PTOL)
+    assert ae(v.imag, 1.0683772981589995996, tol=PTOL)
+    v = fp.ei((1.0 + 1.0j))
+    assert ae(v, (1.7646259855638540684 + 2.3877698515105224193j), tol=ATOL)
+    assert ae(v.real, 1.7646259855638540684, tol=PTOL)
+    assert ae(v.imag, 2.3877698515105224193, tol=PTOL)
+    v = fp.ei((2.0 + 2.0j))
+    assert ae(v, (1.8920781621855474089 + 5.3169624378326579621j), tol=ATOL)
+    assert ae(v.real, 1.8920781621855474089, tol=PTOL)
+    assert ae(v.imag, 5.3169624378326579621, tol=PTOL)
+    v = fp.ei((5.0 + 5.0j))
+    assert ae(v, (-13.470936071475245856 - 15.322492395731230968j), tol=ATOL)
+    assert ae(v.real, -13.470936071475245856, tol=PTOL)
+    assert ae(v.imag, -15.322492395731230968, tol=PTOL)
+    v = fp.ei((20.0 + 20.0j))
+    assert ae(v, (16589317.398788971896 + 5831705.4712368307104j), tol=ATOL)
+    assert ae(v.real, 16589317.398788971896, tol=PTOL)
+    assert ae(v.imag, 5831705.4712368307104, tol=PTOL)
+    v = fp.ei((30.0 + 30.0j))
+    assert ae(v, (-154596484273.69322527 - 204179357834.2723043j), tol=ATOL)
+    assert ae(v.real, -154596484273.69322527, tol=PTOL)
+    assert ae(v.imag, -204179357834.2723043, tol=PTOL)
+    v = fp.ei((40.0 + 40.0j))
+    assert ae(v, (287512180321448.45408 + 4203502407932318.1156j), tol=ATOL)
+    assert ae(v.real, 287512180321448.45408, tol=PTOL)
+    assert ae(v.imag, 4203502407932318.1156, tol=PTOL)
+    v = fp.ei((50.0 + 50.0j))
+    assert ae(v, (36128528616649268826.0 - 64648801861338741960.0j), tol=ATOL)
+    assert ae(v.real, 36128528616649268826.0, tol=PTOL)
+    assert ae(v.imag, -64648801861338741960.0, tol=PTOL)
+    v = fp.ei((80.0 + 80.0j))
+    assert ae(v, (-3.8674816337930010217e+32 - 3.0540709639658071041e+32j), tol=ATOL)
+    assert ae(v.real, -3.8674816337930010217e+32, tol=PTOL)
+    assert ae(v.imag, -3.0540709639658071041e+32, tol=PTOL)
+    v = fp.ei((1.1641532182693481445e-10 + 4.6566128730773925781e-10j))
+    assert ae(v, (-20.880034621432138988 + 1.3258176641336937524j), tol=ATOL)
+    assert ae(v.real, -20.880034621432138988, tol=PTOL)
+    assert ae(v.imag, 1.3258176641336937524, tol=PTOL)
+    v = fp.ei((0.25 + 1.0j))
+    assert ae(v, (0.59066621214766308594 + 2.3968481059377428687j), tol=ATOL)
+    assert ae(v.real, 0.59066621214766308594, tol=PTOL)
+    assert ae(v.imag, 2.3968481059377428687, tol=PTOL)
+    v = fp.ei((1.0 + 4.0j))
+    assert ae(v, (-0.49739047283060471093 + 3.5570287076301818702j), tol=ATOL)
+    assert ae(v.real, -0.49739047283060471093, tol=PTOL)
+    assert ae(v.imag, 3.5570287076301818702, tol=PTOL)
+    v = fp.ei((2.0 + 8.0j))
+    assert ae(v, (0.8705211147733730969 + 3.3825859385758486351j), tol=ATOL)
+    assert ae(v.real, 0.8705211147733730969, tol=PTOL)
+    assert ae(v.imag, 3.3825859385758486351, tol=PTOL)
+    v = fp.ei((5.0 + 20.0j))
+    assert ae(v, (7.0789514293925893007 + 1.5313749363937141849j), tol=ATOL)
+    assert ae(v.real, 7.0789514293925893007, tol=PTOL)
+    assert ae(v.imag, 1.5313749363937141849, tol=PTOL)
+    v = fp.ei((20.0 + 80.0j))
+    assert ae(v, (-5855431.4907298084434 - 720917.79156143806727j), tol=ATOL)
+    assert ae(v.real, -5855431.4907298084434, tol=PTOL)
+    assert ae(v.imag, -720917.79156143806727, tol=PTOL)
+    v = fp.ei((30.0 + 120.0j))
+    assert ae(v, (65402491644.703470747 - 56697658396.51586764j), tol=ATOL)
+    assert ae(v.real, 65402491644.703470747, tol=PTOL)
+    assert ae(v.imag, -56697658396.51586764, tol=PTOL)
+    v = fp.ei((40.0 + 160.0j))
+    assert ae(v, (-25504929379604.776769 + 1429035198630576.3879j), tol=ATOL)
+    assert ae(v.real, -25504929379604.776769, tol=PTOL)
+    assert ae(v.imag, 1429035198630576.3879, tol=PTOL)
+    v = fp.ei((50.0 + 200.0j))
+    assert ae(v, (-18437746526988116954.0 - 17146362239046152342.0j), tol=ATOL)
+    assert ae(v.real, -18437746526988116954.0, tol=PTOL)
+    assert ae(v.imag, -17146362239046152342.0, tol=PTOL)
+    v = fp.ei((80.0 + 320.0j))
+    assert ae(v, (-3.3464697299634526706e+31 - 1.6473152633843023919e+32j), tol=ATOL)
+    assert ae(v.real, -3.3464697299634526706e+31, tol=PTOL)
+    assert ae(v.imag, -1.6473152633843023919e+32, tol=PTOL)
+    v = fp.ei((0.0 + 1.1641532182693481445e-10j))
+    assert ae(v, (-22.29664129357666235 + 1.5707963269113119411j), tol=ATOL)
+    assert ae(v.real, -22.29664129357666235, tol=PTOL)
+    assert ae(v.imag, 1.5707963269113119411, tol=PTOL)
+    v = fp.ei((0.0 + 0.25j))
+    assert ae(v, (-0.82466306258094565309 + 1.8199298971146537833j), tol=ATOL)
+    assert ae(v.real, -0.82466306258094565309, tol=PTOL)
+    assert ae(v.imag, 1.8199298971146537833, tol=PTOL)
+    v = fp.ei((0.0 + 1.0j))
+    assert ae(v, (0.33740392290096813466 + 2.5168793971620796342j), tol=ATOL)
+    assert ae(v.real, 0.33740392290096813466, tol=PTOL)
+    assert ae(v.imag, 2.5168793971620796342, tol=PTOL)
+    v = fp.ei((0.0 + 2.0j))
+    assert ae(v, (0.4229808287748649957 + 3.1762093035975914678j), tol=ATOL)
+    assert ae(v.real, 0.4229808287748649957, tol=PTOL)
+    assert ae(v.imag, 3.1762093035975914678, tol=PTOL)
+    v = fp.ei((0.0 + 5.0j))
+    assert ae(v, (-0.19002974965664387862 + 3.1207275717395707565j), tol=ATOL)
+    assert ae(v.real, -0.19002974965664387862, tol=PTOL)
+    assert ae(v.imag, 3.1207275717395707565, tol=PTOL)
+    v = fp.ei((0.0 + 20.0j))
+    assert ae(v, (0.04441982084535331654 + 3.1190380278383364594j), tol=ATOL)
+    assert ae(v.real, 0.04441982084535331654, tol=PTOL)
+    assert ae(v.imag, 3.1190380278383364594, tol=PTOL)
+    v = fp.ei((0.0 + 30.0j))
+    assert ae(v, (-0.033032417282071143779 + 3.1375528668252477302j), tol=ATOL)
+    assert ae(v.real, -0.033032417282071143779, tol=PTOL)
+    assert ae(v.imag, 3.1375528668252477302, tol=PTOL)
+    v = fp.ei((0.0 + 40.0j))
+    assert ae(v, (0.019020007896208766962 + 3.157781446149681126j), tol=ATOL)
+    assert ae(v.real, 0.019020007896208766962, tol=PTOL)
+    assert ae(v.imag, 3.157781446149681126, tol=PTOL)
+    v = fp.ei((0.0 + 50.0j))
+    assert ae(v, (-0.0056283863241163054402 + 3.122413399280832514j), tol=ATOL)
+    assert ae(v.real, -0.0056283863241163054402, tol=PTOL)
+    assert ae(v.imag, 3.122413399280832514, tol=PTOL)
+    v = fp.ei((0.0 + 80.0j))
+    assert ae(v, (-0.012402501155070958192 + 3.1431272137073839346j), tol=ATOL)
+    assert ae(v.real, -0.012402501155070958192, tol=PTOL)
+    assert ae(v.imag, 3.1431272137073839346, tol=PTOL)
+    v = fp.ei((-1.1641532182693481445e-10 + 4.6566128730773925781e-10j))
+    assert ae(v, (-20.880034621664969632 + 1.8157749903874220607j), tol=ATOL)
+    assert ae(v.real, -20.880034621664969632, tol=PTOL)
+    assert ae(v.imag, 1.8157749903874220607, tol=PTOL)
+    v = fp.ei((-0.25 + 1.0j))
+    assert ae(v, (0.16868306393667788761 + 2.6557914649950505414j), tol=ATOL)
+    assert ae(v.real, 0.16868306393667788761, tol=PTOL)
+    assert ae(v.imag, 2.6557914649950505414, tol=PTOL)
+    v = fp.ei((-1.0 + 4.0j))
+    assert ae(v, (-0.03373591813926547318 + 3.2151161058308770603j), tol=ATOL)
+    assert ae(v.real, -0.03373591813926547318, tol=PTOL)
+    assert ae(v.imag, 3.2151161058308770603, tol=PTOL)
+    v = fp.ei((-2.0 + 8.0j))
+    assert ae(v, (0.015392833434733785143 + 3.1384179414340326969j), tol=ATOL)
+    assert ae(v.real, 0.015392833434733785143, tol=PTOL)
+    assert ae(v.imag, 3.1384179414340326969, tol=PTOL)
+    v = fp.ei((-5.0 + 20.0j))
+    assert ae(v, (0.00024419662286542966525 + 3.1413825703601317109j), tol=ATOL)
+    assert ae(v.real, 0.00024419662286542966525, tol=PTOL)
+    assert ae(v.imag, 3.1413825703601317109, tol=PTOL)
+    v = fp.ei((-20.0 + 80.0j))
+    assert ae(v, (-2.3255552781051330088e-11 + 3.1415926535987396304j), tol=ATOL)
+    assert ae(v.real, -2.3255552781051330088e-11, tol=PTOL)
+    assert ae(v.imag, 3.1415926535987396304, tol=PTOL)
+    v = fp.ei((-30.0 + 120.0j))
+    assert ae(v, (2.7068919097124652332e-16 + 3.1415926535897925337j), tol=ATOL)
+    assert ae(v.real, 2.7068919097124652332e-16, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897925337, tol=PTOL)
+    v = fp.ei((-40.0 + 160.0j))
+    assert ae(v, (1.1695597827678024687e-20 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 1.1695597827678024687e-20, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-50.0 + 200.0j))
+    assert ae(v, (-9.0323746914410162531e-25 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -9.0323746914410162531e-25, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-80.0 + 320.0j))
+    assert ae(v, (-3.4819106748728063576e-38 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -3.4819106748728063576e-38, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-4.6566128730773925781e-10 + 1.1641532182693481445e-10j))
+    assert ae(v, (-20.880034622014215597 + 2.8966139905793444061j), tol=ATOL)
+    assert ae(v.real, -20.880034622014215597, tol=PTOL)
+    assert ae(v.imag, 2.8966139905793444061, tol=PTOL)
+    v = fp.ei((-1.0 + 0.25j))
+    assert ae(v, (-0.19731063945004229095 + 3.0542266078154932748j), tol=ATOL)
+    assert ae(v.real, -0.19731063945004229095, tol=PTOL)
+    assert ae(v.imag, 3.0542266078154932748, tol=PTOL)
+    v = fp.ei((-4.0 + 1.0j))
+    assert ae(v, (-0.0013106173980145506944 + 3.1381384055698581758j), tol=ATOL)
+    assert ae(v.real, -0.0013106173980145506944, tol=PTOL)
+    assert ae(v.imag, 3.1381384055698581758, tol=PTOL)
+    v = fp.ei((-8.0 + 2.0j))
+    assert ae(v, (0.000022278049065270225945 + 3.1415634616493367169j), tol=ATOL)
+    assert ae(v.real, 0.000022278049065270225945, tol=PTOL)
+    assert ae(v.imag, 3.1415634616493367169, tol=PTOL)
+    v = fp.ei((-20.0 + 5.0j))
+    assert ae(v, (-4.7711374515765346894e-11 + 3.1415926536726958909j), tol=ATOL)
+    assert ae(v.real, -4.7711374515765346894e-11, tol=PTOL)
+    assert ae(v.imag, 3.1415926536726958909, tol=PTOL)
+    v = fp.ei((-80.0 + 20.0j))
+    assert ae(v, (-3.8353473865788235787e-38 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -3.8353473865788235787e-38, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-120.0 + 30.0j))
+    assert ae(v, (-2.3836002337480334716e-55 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -2.3836002337480334716e-55, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-160.0 + 40.0j))
+    assert ae(v, (1.6238022898654510661e-72 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 1.6238022898654510661e-72, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-200.0 + 50.0j))
+    assert ae(v, (-6.6800061461666228487e-90 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -6.6800061461666228487e-90, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-320.0 + 80.0j))
+    assert ae(v, (-4.2737871527778786157e-143 + 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -4.2737871527778786157e-143, tol=PTOL)
+    assert ae(v.imag, 3.1415926535897932385, tol=PTOL)
+    v = fp.ei(-1.1641532182693481445e-10)
+    assert ae(v, -22.296641293693077672, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-0.25)
+    assert ae(v, -1.0442826344437381945, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-1.0)
+    assert ae(v, -0.21938393439552027368, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-2.0)
+    assert ae(v, -0.048900510708061119567, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-5.0)
+    assert ae(v, -0.0011482955912753257973, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-20.0)
+    assert ae(v, -9.8355252906498816904e-11, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-30.0)
+    assert ae(v, -3.0215520106888125448e-15, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-40.0)
+    assert ae(v, -1.0367732614516569722e-19, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-50.0)
+    assert ae(v, -3.7832640295504590187e-24, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei(-80.0)
+    assert ae(v, -2.2285432586884729112e-37, tol=ATOL)
+    assert type(v) is float
+    v = fp.ei((-1.1641532182693481445e-10 + 0.0j))
+    assert ae(v, (-22.296641293693077672 + 0.0j), tol=ATOL)
+    assert ae(v.real, -22.296641293693077672, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-0.25 + 0.0j))
+    assert ae(v, (-1.0442826344437381945 + 0.0j), tol=ATOL)
+    assert ae(v.real, -1.0442826344437381945, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-1.0 + 0.0j))
+    assert ae(v, (-0.21938393439552027368 + 0.0j), tol=ATOL)
+    assert ae(v.real, -0.21938393439552027368, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-2.0 + 0.0j))
+    assert ae(v, (-0.048900510708061119567 + 0.0j), tol=ATOL)
+    assert ae(v.real, -0.048900510708061119567, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-5.0 + 0.0j))
+    assert ae(v, (-0.0011482955912753257973 + 0.0j), tol=ATOL)
+    assert ae(v.real, -0.0011482955912753257973, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-20.0 + 0.0j))
+    assert ae(v, (-9.8355252906498816904e-11 + 0.0j), tol=ATOL)
+    assert ae(v.real, -9.8355252906498816904e-11, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-30.0 + 0.0j))
+    assert ae(v, (-3.0215520106888125448e-15 + 0.0j), tol=ATOL)
+    assert ae(v.real, -3.0215520106888125448e-15, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-40.0 + 0.0j))
+    assert ae(v, (-1.0367732614516569722e-19 + 0.0j), tol=ATOL)
+    assert ae(v.real, -1.0367732614516569722e-19, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-50.0 + 0.0j))
+    assert ae(v, (-3.7832640295504590187e-24 + 0.0j), tol=ATOL)
+    assert ae(v.real, -3.7832640295504590187e-24, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-80.0 + 0.0j))
+    assert ae(v, (-2.2285432586884729112e-37 + 0.0j), tol=ATOL)
+    assert ae(v.real, -2.2285432586884729112e-37, tol=PTOL)
+    assert v.imag == 0
+    v = fp.ei((-4.6566128730773925781e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (-20.880034622014215597 - 2.8966139905793444061j), tol=ATOL)
+    assert ae(v.real, -20.880034622014215597, tol=PTOL)
+    assert ae(v.imag, -2.8966139905793444061, tol=PTOL)
+    v = fp.ei((-1.0 - 0.25j))
+    assert ae(v, (-0.19731063945004229095 - 3.0542266078154932748j), tol=ATOL)
+    assert ae(v.real, -0.19731063945004229095, tol=PTOL)
+    assert ae(v.imag, -3.0542266078154932748, tol=PTOL)
+    v = fp.ei((-4.0 - 1.0j))
+    assert ae(v, (-0.0013106173980145506944 - 3.1381384055698581758j), tol=ATOL)
+    assert ae(v.real, -0.0013106173980145506944, tol=PTOL)
+    assert ae(v.imag, -3.1381384055698581758, tol=PTOL)
+    v = fp.ei((-8.0 - 2.0j))
+    assert ae(v, (0.000022278049065270225945 - 3.1415634616493367169j), tol=ATOL)
+    assert ae(v.real, 0.000022278049065270225945, tol=PTOL)
+    assert ae(v.imag, -3.1415634616493367169, tol=PTOL)
+    v = fp.ei((-20.0 - 5.0j))
+    assert ae(v, (-4.7711374515765346894e-11 - 3.1415926536726958909j), tol=ATOL)
+    assert ae(v.real, -4.7711374515765346894e-11, tol=PTOL)
+    assert ae(v.imag, -3.1415926536726958909, tol=PTOL)
+    v = fp.ei((-80.0 - 20.0j))
+    assert ae(v, (-3.8353473865788235787e-38 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -3.8353473865788235787e-38, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-120.0 - 30.0j))
+    assert ae(v, (-2.3836002337480334716e-55 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -2.3836002337480334716e-55, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-160.0 - 40.0j))
+    assert ae(v, (1.6238022898654510661e-72 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 1.6238022898654510661e-72, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-200.0 - 50.0j))
+    assert ae(v, (-6.6800061461666228487e-90 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -6.6800061461666228487e-90, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-320.0 - 80.0j))
+    assert ae(v, (-4.2737871527778786157e-143 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -4.2737871527778786157e-143, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-1.1641532182693481445e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (-21.950067703413105017 - 2.3561944903087602507j), tol=ATOL)
+    assert ae(v.real, -21.950067703413105017, tol=PTOL)
+    assert ae(v.imag, -2.3561944903087602507, tol=PTOL)
+    v = fp.ei((-0.25 - 0.25j))
+    assert ae(v, (-0.71092525792923287894 - 2.5766745291767512913j), tol=ATOL)
+    assert ae(v.real, -0.71092525792923287894, tol=PTOL)
+    assert ae(v.imag, -2.5766745291767512913, tol=PTOL)
+    v = fp.ei((-1.0 - 1.0j))
+    assert ae(v, (-0.00028162445198141832551 - 2.9622681185504342983j), tol=ATOL)
+    assert ae(v.real, -0.00028162445198141832551, tol=PTOL)
+    assert ae(v.imag, -2.9622681185504342983, tol=PTOL)
+    v = fp.ei((-2.0 - 2.0j))
+    assert ae(v, (0.033767089606562004246 - 3.1229932394200426965j), tol=ATOL)
+    assert ae(v.real, 0.033767089606562004246, tol=PTOL)
+    assert ae(v.imag, -3.1229932394200426965, tol=PTOL)
+    v = fp.ei((-5.0 - 5.0j))
+    assert ae(v, (-0.0007266506660356393891 - 3.1420636813914284609j), tol=ATOL)
+    assert ae(v.real, -0.0007266506660356393891, tol=PTOL)
+    assert ae(v.imag, -3.1420636813914284609, tol=PTOL)
+    v = fp.ei((-20.0 - 20.0j))
+    assert ae(v, (2.3824537449367396579e-11 - 3.1415926535228233653j), tol=ATOL)
+    assert ae(v.real, 2.3824537449367396579e-11, tol=PTOL)
+    assert ae(v.imag, -3.1415926535228233653, tol=PTOL)
+    v = fp.ei((-30.0 - 30.0j))
+    assert ae(v, (-1.7316045841744061617e-15 - 3.141592653589794545j), tol=ATOL)
+    assert ae(v.real, -1.7316045841744061617e-15, tol=PTOL)
+    assert ae(v.imag, -3.141592653589794545, tol=PTOL)
+    v = fp.ei((-40.0 - 40.0j))
+    assert ae(v, (7.4001043002899232182e-20 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 7.4001043002899232182e-20, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-50.0 - 50.0j))
+    assert ae(v, (-2.3566128324644641219e-24 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -2.3566128324644641219e-24, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-80.0 - 80.0j))
+    assert ae(v, (-9.8279750572186526673e-38 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -9.8279750572186526673e-38, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-1.1641532182693481445e-10 - 4.6566128730773925781e-10j))
+    assert ae(v, (-20.880034621664969632 - 1.8157749903874220607j), tol=ATOL)
+    assert ae(v.real, -20.880034621664969632, tol=PTOL)
+    assert ae(v.imag, -1.8157749903874220607, tol=PTOL)
+    v = fp.ei((-0.25 - 1.0j))
+    assert ae(v, (0.16868306393667788761 - 2.6557914649950505414j), tol=ATOL)
+    assert ae(v.real, 0.16868306393667788761, tol=PTOL)
+    assert ae(v.imag, -2.6557914649950505414, tol=PTOL)
+    v = fp.ei((-1.0 - 4.0j))
+    assert ae(v, (-0.03373591813926547318 - 3.2151161058308770603j), tol=ATOL)
+    assert ae(v.real, -0.03373591813926547318, tol=PTOL)
+    assert ae(v.imag, -3.2151161058308770603, tol=PTOL)
+    v = fp.ei((-2.0 - 8.0j))
+    assert ae(v, (0.015392833434733785143 - 3.1384179414340326969j), tol=ATOL)
+    assert ae(v.real, 0.015392833434733785143, tol=PTOL)
+    assert ae(v.imag, -3.1384179414340326969, tol=PTOL)
+    v = fp.ei((-5.0 - 20.0j))
+    assert ae(v, (0.00024419662286542966525 - 3.1413825703601317109j), tol=ATOL)
+    assert ae(v.real, 0.00024419662286542966525, tol=PTOL)
+    assert ae(v.imag, -3.1413825703601317109, tol=PTOL)
+    v = fp.ei((-20.0 - 80.0j))
+    assert ae(v, (-2.3255552781051330088e-11 - 3.1415926535987396304j), tol=ATOL)
+    assert ae(v.real, -2.3255552781051330088e-11, tol=PTOL)
+    assert ae(v.imag, -3.1415926535987396304, tol=PTOL)
+    v = fp.ei((-30.0 - 120.0j))
+    assert ae(v, (2.7068919097124652332e-16 - 3.1415926535897925337j), tol=ATOL)
+    assert ae(v.real, 2.7068919097124652332e-16, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897925337, tol=PTOL)
+    v = fp.ei((-40.0 - 160.0j))
+    assert ae(v, (1.1695597827678024687e-20 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, 1.1695597827678024687e-20, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-50.0 - 200.0j))
+    assert ae(v, (-9.0323746914410162531e-25 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -9.0323746914410162531e-25, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((-80.0 - 320.0j))
+    assert ae(v, (-3.4819106748728063576e-38 - 3.1415926535897932385j), tol=ATOL)
+    assert ae(v.real, -3.4819106748728063576e-38, tol=PTOL)
+    assert ae(v.imag, -3.1415926535897932385, tol=PTOL)
+    v = fp.ei((0.0 - 1.1641532182693481445e-10j))
+    assert ae(v, (-22.29664129357666235 - 1.5707963269113119411j), tol=ATOL)
+    assert ae(v.real, -22.29664129357666235, tol=PTOL)
+    assert ae(v.imag, -1.5707963269113119411, tol=PTOL)
+    v = fp.ei((0.0 - 0.25j))
+    assert ae(v, (-0.82466306258094565309 - 1.8199298971146537833j), tol=ATOL)
+    assert ae(v.real, -0.82466306258094565309, tol=PTOL)
+    assert ae(v.imag, -1.8199298971146537833, tol=PTOL)
+    v = fp.ei((0.0 - 1.0j))
+    assert ae(v, (0.33740392290096813466 - 2.5168793971620796342j), tol=ATOL)
+    assert ae(v.real, 0.33740392290096813466, tol=PTOL)
+    assert ae(v.imag, -2.5168793971620796342, tol=PTOL)
+    v = fp.ei((0.0 - 2.0j))
+    assert ae(v, (0.4229808287748649957 - 3.1762093035975914678j), tol=ATOL)
+    assert ae(v.real, 0.4229808287748649957, tol=PTOL)
+    assert ae(v.imag, -3.1762093035975914678, tol=PTOL)
+    v = fp.ei((0.0 - 5.0j))
+    assert ae(v, (-0.19002974965664387862 - 3.1207275717395707565j), tol=ATOL)
+    assert ae(v.real, -0.19002974965664387862, tol=PTOL)
+    assert ae(v.imag, -3.1207275717395707565, tol=PTOL)
+    v = fp.ei((0.0 - 20.0j))
+    assert ae(v, (0.04441982084535331654 - 3.1190380278383364594j), tol=ATOL)
+    assert ae(v.real, 0.04441982084535331654, tol=PTOL)
+    assert ae(v.imag, -3.1190380278383364594, tol=PTOL)
+    v = fp.ei((0.0 - 30.0j))
+    assert ae(v, (-0.033032417282071143779 - 3.1375528668252477302j), tol=ATOL)
+    assert ae(v.real, -0.033032417282071143779, tol=PTOL)
+    assert ae(v.imag, -3.1375528668252477302, tol=PTOL)
+    v = fp.ei((0.0 - 40.0j))
+    assert ae(v, (0.019020007896208766962 - 3.157781446149681126j), tol=ATOL)
+    assert ae(v.real, 0.019020007896208766962, tol=PTOL)
+    assert ae(v.imag, -3.157781446149681126, tol=PTOL)
+    v = fp.ei((0.0 - 50.0j))
+    assert ae(v, (-0.0056283863241163054402 - 3.122413399280832514j), tol=ATOL)
+    assert ae(v.real, -0.0056283863241163054402, tol=PTOL)
+    assert ae(v.imag, -3.122413399280832514, tol=PTOL)
+    v = fp.ei((0.0 - 80.0j))
+    assert ae(v, (-0.012402501155070958192 - 3.1431272137073839346j), tol=ATOL)
+    assert ae(v.real, -0.012402501155070958192, tol=PTOL)
+    assert ae(v.imag, -3.1431272137073839346, tol=PTOL)
+    v = fp.ei((1.1641532182693481445e-10 - 4.6566128730773925781e-10j))
+    assert ae(v, (-20.880034621432138988 - 1.3258176641336937524j), tol=ATOL)
+    assert ae(v.real, -20.880034621432138988, tol=PTOL)
+    assert ae(v.imag, -1.3258176641336937524, tol=PTOL)
+    v = fp.ei((0.25 - 1.0j))
+    assert ae(v, (0.59066621214766308594 - 2.3968481059377428687j), tol=ATOL)
+    assert ae(v.real, 0.59066621214766308594, tol=PTOL)
+    assert ae(v.imag, -2.3968481059377428687, tol=PTOL)
+    v = fp.ei((1.0 - 4.0j))
+    assert ae(v, (-0.49739047283060471093 - 3.5570287076301818702j), tol=ATOL)
+    assert ae(v.real, -0.49739047283060471093, tol=PTOL)
+    assert ae(v.imag, -3.5570287076301818702, tol=PTOL)
+    v = fp.ei((2.0 - 8.0j))
+    assert ae(v, (0.8705211147733730969 - 3.3825859385758486351j), tol=ATOL)
+    assert ae(v.real, 0.8705211147733730969, tol=PTOL)
+    assert ae(v.imag, -3.3825859385758486351, tol=PTOL)
+    v = fp.ei((5.0 - 20.0j))
+    assert ae(v, (7.0789514293925893007 - 1.5313749363937141849j), tol=ATOL)
+    assert ae(v.real, 7.0789514293925893007, tol=PTOL)
+    assert ae(v.imag, -1.5313749363937141849, tol=PTOL)
+    v = fp.ei((20.0 - 80.0j))
+    assert ae(v, (-5855431.4907298084434 + 720917.79156143806727j), tol=ATOL)
+    assert ae(v.real, -5855431.4907298084434, tol=PTOL)
+    assert ae(v.imag, 720917.79156143806727, tol=PTOL)
+    v = fp.ei((30.0 - 120.0j))
+    assert ae(v, (65402491644.703470747 + 56697658396.51586764j), tol=ATOL)
+    assert ae(v.real, 65402491644.703470747, tol=PTOL)
+    assert ae(v.imag, 56697658396.51586764, tol=PTOL)
+    v = fp.ei((40.0 - 160.0j))
+    assert ae(v, (-25504929379604.776769 - 1429035198630576.3879j), tol=ATOL)
+    assert ae(v.real, -25504929379604.776769, tol=PTOL)
+    assert ae(v.imag, -1429035198630576.3879, tol=PTOL)
+    v = fp.ei((50.0 - 200.0j))
+    assert ae(v, (-18437746526988116954.0 + 17146362239046152342.0j), tol=ATOL)
+    assert ae(v.real, -18437746526988116954.0, tol=PTOL)
+    assert ae(v.imag, 17146362239046152342.0, tol=PTOL)
+    v = fp.ei((80.0 - 320.0j))
+    assert ae(v, (-3.3464697299634526706e+31 + 1.6473152633843023919e+32j), tol=ATOL)
+    assert ae(v.real, -3.3464697299634526706e+31, tol=PTOL)
+    assert ae(v.imag, 1.6473152633843023919e+32, tol=PTOL)
+    v = fp.ei((1.1641532182693481445e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (-21.950067703180274374 - 0.78539816351386363145j), tol=ATOL)
+    assert ae(v.real, -21.950067703180274374, tol=PTOL)
+    assert ae(v.imag, -0.78539816351386363145, tol=PTOL)
+    v = fp.ei((0.25 - 0.25j))
+    assert ae(v, (-0.21441047326710323254 - 1.0683772981589995996j), tol=ATOL)
+    assert ae(v.real, -0.21441047326710323254, tol=PTOL)
+    assert ae(v.imag, -1.0683772981589995996, tol=PTOL)
+    v = fp.ei((1.0 - 1.0j))
+    assert ae(v, (1.7646259855638540684 - 2.3877698515105224193j), tol=ATOL)
+    assert ae(v.real, 1.7646259855638540684, tol=PTOL)
+    assert ae(v.imag, -2.3877698515105224193, tol=PTOL)
+    v = fp.ei((2.0 - 2.0j))
+    assert ae(v, (1.8920781621855474089 - 5.3169624378326579621j), tol=ATOL)
+    assert ae(v.real, 1.8920781621855474089, tol=PTOL)
+    assert ae(v.imag, -5.3169624378326579621, tol=PTOL)
+    v = fp.ei((5.0 - 5.0j))
+    assert ae(v, (-13.470936071475245856 + 15.322492395731230968j), tol=ATOL)
+    assert ae(v.real, -13.470936071475245856, tol=PTOL)
+    assert ae(v.imag, 15.322492395731230968, tol=PTOL)
+    v = fp.ei((20.0 - 20.0j))
+    assert ae(v, (16589317.398788971896 - 5831705.4712368307104j), tol=ATOL)
+    assert ae(v.real, 16589317.398788971896, tol=PTOL)
+    assert ae(v.imag, -5831705.4712368307104, tol=PTOL)
+    v = fp.ei((30.0 - 30.0j))
+    assert ae(v, (-154596484273.69322527 + 204179357834.2723043j), tol=ATOL)
+    assert ae(v.real, -154596484273.69322527, tol=PTOL)
+    assert ae(v.imag, 204179357834.2723043, tol=PTOL)
+    v = fp.ei((40.0 - 40.0j))
+    assert ae(v, (287512180321448.45408 - 4203502407932318.1156j), tol=ATOL)
+    assert ae(v.real, 287512180321448.45408, tol=PTOL)
+    assert ae(v.imag, -4203502407932318.1156, tol=PTOL)
+    v = fp.ei((50.0 - 50.0j))
+    assert ae(v, (36128528616649268826.0 + 64648801861338741960.0j), tol=ATOL)
+    assert ae(v.real, 36128528616649268826.0, tol=PTOL)
+    assert ae(v.imag, 64648801861338741960.0, tol=PTOL)
+    v = fp.ei((80.0 - 80.0j))
+    assert ae(v, (-3.8674816337930010217e+32 + 3.0540709639658071041e+32j), tol=ATOL)
+    assert ae(v.real, -3.8674816337930010217e+32, tol=PTOL)
+    assert ae(v.imag, 3.0540709639658071041e+32, tol=PTOL)
+    v = fp.ei((4.6566128730773925781e-10 - 1.1641532182693481445e-10j))
+    assert ae(v, (-20.880034621082893023 - 0.24497866324327947603j), tol=ATOL)
+    assert ae(v.real, -20.880034621082893023, tol=PTOL)
+    assert ae(v.imag, -0.24497866324327947603, tol=PTOL)
+    v = fp.ei((1.0 - 0.25j))
+    assert ae(v, (1.8942716983721074932 - 0.67268237088273915854j), tol=ATOL)
+    assert ae(v.real, 1.8942716983721074932, tol=PTOL)
+    assert ae(v.imag, -0.67268237088273915854, tol=PTOL)
+    v = fp.ei((4.0 - 1.0j))
+    assert ae(v, (14.806699492675420438 - 12.280015176673582616j), tol=ATOL)
+    assert ae(v.real, 14.806699492675420438, tol=PTOL)
+    assert ae(v.imag, -12.280015176673582616, tol=PTOL)
+    v = fp.ei((8.0 - 2.0j))
+    assert ae(v, (-54.633252667426386294 - 416.34477429173650012j), tol=ATOL)
+    assert ae(v.real, -54.633252667426386294, tol=PTOL)
+    assert ae(v.imag, -416.34477429173650012, tol=PTOL)
+    v = fp.ei((20.0 - 5.0j))
+    assert ae(v, (711836.97165402624643 + 24745247.798103247366j), tol=ATOL)
+    assert ae(v.real, 711836.97165402624643, tol=PTOL)
+    assert ae(v.imag, 24745247.798103247366, tol=PTOL)
+    v = fp.ei((80.0 - 20.0j))
+    assert ae(v, (4.2139911108612653091e+32 - 5.3367124741918251637e+32j), tol=ATOL)
+    assert ae(v.real, 4.2139911108612653091e+32, tol=PTOL)
+    assert ae(v.imag, -5.3367124741918251637e+32, tol=PTOL)
+    v = fp.ei((120.0 - 30.0j))
+    assert ae(v, (-9.7760616203707508892e+48 + 1.058257682317195792e+50j), tol=ATOL)
+    assert ae(v.real, -9.7760616203707508892e+48, tol=PTOL)
+    assert ae(v.imag, 1.058257682317195792e+50, tol=PTOL)
+    v = fp.ei((160.0 - 40.0j))
+    assert ae(v, (-8.7065541466623638861e+66 - 1.6577106725141739889e+67j), tol=ATOL)
+    assert ae(v.real, -8.7065541466623638861e+66, tol=PTOL)
+    assert ae(v.imag, -1.6577106725141739889e+67, tol=PTOL)
+    v = fp.ei((200.0 - 50.0j))
+    assert ae(v, (3.070744996327018106e+84 + 1.7243244846769415903e+84j), tol=ATOL)
+    assert ae(v.real, 3.070744996327018106e+84, tol=PTOL)
+    assert ae(v.imag, 1.7243244846769415903e+84, tol=PTOL)
+    v = fp.ei((320.0 - 80.0j))
+    assert ae(v, (-9.9960598637998647276e+135 + 2.6855081527595608863e+136j), tol=ATOL)
+    assert ae(v.real, -9.9960598637998647276e+135, tol=PTOL)
+    assert ae(v.imag, 2.6855081527595608863e+136, tol=PTOL)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/test_functions.py ADDED Viewed

	@@ -0,0 +1,920 @@

+from mpmath.libmp import *
+from mpmath import *
+import random
+import time
+import math
+import cmath
+def mpc_ae(a, b, eps=eps):
+    res = True
+    res = res and a.real.ae(b.real, eps)
+    res = res and a.imag.ae(b.imag, eps)
+    return res
+#----------------------------------------------------------------------------
+# Constants and functions
+#
+tpi = "3.1415926535897932384626433832795028841971693993751058209749445923078\
+1640628620899862803482534211706798"
+te = "2.71828182845904523536028747135266249775724709369995957496696762772407\
+663035354759457138217852516642743"
+tdegree = "0.017453292519943295769236907684886127134428718885417254560971914\
+4017100911460344944368224156963450948221"
+teuler = "0.5772156649015328606065120900824024310421593359399235988057672348\
+84867726777664670936947063291746749516"
+tln2 = "0.693147180559945309417232121458176568075500134360255254120680009493\
+393621969694715605863326996418687542"
+tln10 = "2.30258509299404568401799145468436420760110148862877297603332790096\
+757260967735248023599720508959829834"
+tcatalan = "0.91596559417721901505460351493238411077414937428167213426649811\
+9621763019776254769479356512926115106249"
+tkhinchin = "2.6854520010653064453097148354817956938203822939944629530511523\
+4555721885953715200280114117493184769800"
+tglaisher = "1.2824271291006226368753425688697917277676889273250011920637400\
+2174040630885882646112973649195820237439420646"
+tapery = "1.2020569031595942853997381615114499907649862923404988817922715553\
+4183820578631309018645587360933525815"
+tphi = "1.618033988749894848204586834365638117720309179805762862135448622705\
+26046281890244970720720418939113748475"
+tmertens = "0.26149721284764278375542683860869585905156664826119920619206421\
+3924924510897368209714142631434246651052"
+ttwinprime = "0.660161815846869573927812110014555778432623360284733413319448\
+423335405642304495277143760031413839867912"
+def test_constants():
+    for prec in [3, 7, 10, 15, 20, 37, 80, 100, 29]:
+        mp.dps = prec
+        assert pi == mpf(tpi)
+        assert e == mpf(te)
+        assert degree == mpf(tdegree)
+        assert euler == mpf(teuler)
+        assert ln2 == mpf(tln2)
+        assert ln10 == mpf(tln10)
+        assert catalan == mpf(tcatalan)
+        assert khinchin == mpf(tkhinchin)
+        assert glaisher == mpf(tglaisher)
+        assert phi == mpf(tphi)
+        if prec < 50:
+            assert mertens == mpf(tmertens)
+            assert twinprime == mpf(ttwinprime)
+    mp.dps = 15
+    assert pi >= -1
+    assert pi > 2
+    assert pi > 3
+    assert pi < 4
+def test_exact_sqrts():
+    for i in range(20000):
+        assert sqrt(mpf(i*i)) == i
+    random.seed(1)
+    for prec in [100, 300, 1000, 10000]:
+        mp.dps = prec
+        for i in range(20):
+            A = random.randint(10**(prec//2-2), 10**(prec//2-1))
+            assert sqrt(mpf(A*A)) == A
+    mp.dps = 15
+    for i in range(100):
+        for a in [1, 8, 25, 112307]:
+            assert sqrt(mpf((a*a, 2*i))) == mpf((a, i))
+            assert sqrt(mpf((a*a, -2*i))) == mpf((a, -i))
+def test_sqrt_rounding():
+    for i in [2, 3, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15]:
+        i = from_int(i)
+        for dps in [7, 15, 83, 106, 2000]:
+            mp.dps = dps
+            a = mpf_pow_int(mpf_sqrt(i, mp.prec, round_down), 2, mp.prec, round_down)
+            b = mpf_pow_int(mpf_sqrt(i, mp.prec, round_up), 2, mp.prec, round_up)
+            assert mpf_lt(a, i)
+            assert mpf_gt(b, i)
+    random.seed(1234)
+    prec = 100
+    for rnd in [round_down, round_nearest, round_ceiling]:
+        for i in range(100):
+            a = mpf_rand(prec)
+            b = mpf_mul(a, a)
+            assert mpf_sqrt(b, prec, rnd) == a
+    # Test some extreme cases
+    mp.dps = 100
+    a = mpf(9) + 1e-90
+    b = mpf(9) - 1e-90
+    mp.dps = 15
+    assert sqrt(a, rounding='d') == 3
+    assert sqrt(a, rounding='n') == 3
+    assert sqrt(a, rounding='u') > 3
+    assert sqrt(b, rounding='d') < 3
+    assert sqrt(b, rounding='n') == 3
+    assert sqrt(b, rounding='u') == 3
+    # A worst case, from the MPFR test suite
+    assert sqrt(mpf('7.0503726185518891')) == mpf('2.655253776675949')
+def test_float_sqrt():
+    mp.dps = 15
+    # These should round identically
+    for x in [0, 1e-7, 0.1, 0.5, 1, 2, 3, 4, 5, 0.333, 76.19]:
+        assert sqrt(mpf(x)) == float(x)**0.5
+    assert sqrt(-1) == 1j
+    assert sqrt(-2).ae(cmath.sqrt(-2))
+    assert sqrt(-3).ae(cmath.sqrt(-3))
+    assert sqrt(-100).ae(cmath.sqrt(-100))
+    assert sqrt(1j).ae(cmath.sqrt(1j))
+    assert sqrt(-1j).ae(cmath.sqrt(-1j))
+    assert sqrt(math.pi + math.e*1j).ae(cmath.sqrt(math.pi + math.e*1j))
+    assert sqrt(math.pi - math.e*1j).ae(cmath.sqrt(math.pi - math.e*1j))
+def test_hypot():
+    assert hypot(0, 0) == 0
+    assert hypot(0, 0.33) == mpf(0.33)
+    assert hypot(0.33, 0) == mpf(0.33)
+    assert hypot(-0.33, 0) == mpf(0.33)
+    assert hypot(3, 4) == mpf(5)
+def test_exact_cbrt():
+    for i in range(0, 20000, 200):
+        assert cbrt(mpf(i*i*i)) == i
+    random.seed(1)
+    for prec in [100, 300, 1000, 10000]:
+        mp.dps = prec
+        A = random.randint(10**(prec//2-2), 10**(prec//2-1))
+        assert cbrt(mpf(A*A*A)) == A
+    mp.dps = 15
+def test_exp():
+    assert exp(0) == 1
+    assert exp(10000).ae(mpf('8.8068182256629215873e4342'))
+    assert exp(-10000).ae(mpf('1.1354838653147360985e-4343'))
+    a = exp(mpf((1, 8198646019315405, -53, 53)))
+    assert(a.bc == bitcount(a.man))
+    mp.prec = 67
+    a = exp(mpf((1, 1781864658064754565, -60, 61)))
+    assert(a.bc == bitcount(a.man))
+    mp.prec = 53
+    assert exp(ln2 * 10).ae(1024)
+    assert exp(2+2j).ae(cmath.exp(2+2j))
+def test_issue_73():
+    mp.dps = 512
+    a = exp(-1)
+    b = exp(1)
+    mp.dps = 15
+    assert (+a).ae(0.36787944117144233)
+    assert (+b).ae(2.7182818284590451)
+def test_log():
+    mp.dps = 15
+    assert log(1) == 0
+    for x in [0.5, 1.5, 2.0, 3.0, 100, 10**50, 1e-50]:
+        assert log(x).ae(math.log(x))
+        assert log(x, x) == 1
+    assert log(1024, 2) == 10
+    assert log(10**1234, 10) == 1234
+    assert log(2+2j).ae(cmath.log(2+2j))
+    # Accuracy near 1
+    assert (log(0.6+0.8j).real*10**17).ae(2.2204460492503131)
+    assert (log(0.6-0.8j).real*10**17).ae(2.2204460492503131)
+    assert (log(0.8-0.6j).real*10**17).ae(2.2204460492503131)
+    assert (log(1+1e-8j).real*10**16).ae(0.5)
+    assert (log(1-1e-8j).real*10**16).ae(0.5)
+    assert (log(-1+1e-8j).real*10**16).ae(0.5)
+    assert (log(-1-1e-8j).real*10**16).ae(0.5)
+    assert (log(1j+1e-8).real*10**16).ae(0.5)
+    assert (log(1j-1e-8).real*10**16).ae(0.5)
+    assert (log(-1j+1e-8).real*10**16).ae(0.5)
+    assert (log(-1j-1e-8).real*10**16).ae(0.5)
+    assert (log(1+1e-40j).real*10**80).ae(0.5)
+    assert (log(1j+1e-40).real*10**80).ae(0.5)
+    # Huge
+    assert log(ldexp(1.234,10**20)).ae(log(2)*1e20)
+    assert log(ldexp(1.234,10**200)).ae(log(2)*1e200)
+    # Some special values
+    assert log(mpc(0,0)) == mpc(-inf,0)
+    assert isnan(log(mpc(nan,0)).real)
+    assert isnan(log(mpc(nan,0)).imag)
+    assert isnan(log(mpc(0,nan)).real)
+    assert isnan(log(mpc(0,nan)).imag)
+    assert isnan(log(mpc(nan,1)).real)
+    assert isnan(log(mpc(nan,1)).imag)
+    assert isnan(log(mpc(1,nan)).real)
+    assert isnan(log(mpc(1,nan)).imag)
+def test_trig_hyperb_basic():
+    for x in (list(range(100)) + list(range(-100,0))):
+        t = x / 4.1
+        assert cos(mpf(t)).ae(math.cos(t))
+        assert sin(mpf(t)).ae(math.sin(t))
+        assert tan(mpf(t)).ae(math.tan(t))
+        assert cosh(mpf(t)).ae(math.cosh(t))
+        assert sinh(mpf(t)).ae(math.sinh(t))
+        assert tanh(mpf(t)).ae(math.tanh(t))
+    assert sin(1+1j).ae(cmath.sin(1+1j))
+    assert sin(-4-3.6j).ae(cmath.sin(-4-3.6j))
+    assert cos(1+1j).ae(cmath.cos(1+1j))
+    assert cos(-4-3.6j).ae(cmath.cos(-4-3.6j))
+def test_degrees():
+    assert cos(0*degree) == 1
+    assert cos(90*degree).ae(0)
+    assert cos(180*degree).ae(-1)
+    assert cos(270*degree).ae(0)
+    assert cos(360*degree).ae(1)
+    assert sin(0*degree) == 0
+    assert sin(90*degree).ae(1)
+    assert sin(180*degree).ae(0)
+    assert sin(270*degree).ae(-1)
+    assert sin(360*degree).ae(0)
+def random_complexes(N):
+    random.seed(1)
+    a = []
+    for i in range(N):
+        x1 = random.uniform(-10, 10)
+        y1 = random.uniform(-10, 10)
+        x2 = random.uniform(-10, 10)
+        y2 = random.uniform(-10, 10)
+        z1 = complex(x1, y1)
+        z2 = complex(x2, y2)
+        a.append((z1, z2))
+    return a
+def test_complex_powers():
+    for dps in [15, 30, 100]:
+        # Check accuracy for complex square root
+        mp.dps = dps
+        a = mpc(1j)**0.5
+        assert a.real == a.imag == mpf(2)**0.5 / 2
+    mp.dps = 15
+    random.seed(1)
+    for (z1, z2) in random_complexes(100):
+        assert (mpc(z1)**mpc(z2)).ae(z1**z2, 1e-12)
+    assert (e**(-pi*1j)).ae(-1)
+    mp.dps = 50
+    assert (e**(-pi*1j)).ae(-1)
+    mp.dps = 15
+def test_complex_sqrt_accuracy():
+    def test_mpc_sqrt(lst):
+        for a, b in lst:
+            z = mpc(a + j*b)
+            assert mpc_ae(sqrt(z*z), z)
+            z = mpc(-a + j*b)
+            assert mpc_ae(sqrt(z*z), -z)
+            z = mpc(a - j*b)
+            assert mpc_ae(sqrt(z*z), z)
+            z = mpc(-a - j*b)
+            assert mpc_ae(sqrt(z*z), -z)
+    random.seed(2)
+    N = 10
+    mp.dps = 30
+    dps = mp.dps
+    test_mpc_sqrt([(random.uniform(0, 10),random.uniform(0, 10)) for i in range(N)])
+    test_mpc_sqrt([(i + 0.1, (i + 0.2)*10**i) for i in range(N)])
+    mp.dps = 15
+def test_atan():
+    mp.dps = 15
+    assert atan(-2.3).ae(math.atan(-2.3))
+    assert atan(1e-50) == 1e-50
+    assert atan(1e50).ae(pi/2)
+    assert atan(-1e-50) == -1e-50
+    assert atan(-1e50).ae(-pi/2)
+    assert atan(10**1000).ae(pi/2)
+    for dps in [25, 70, 100, 300, 1000]:
+        mp.dps = dps
+        assert (4*atan(1)).ae(pi)
+    mp.dps = 15
+    pi2 = pi/2
+    assert atan(mpc(inf,-1)).ae(pi2)
+    assert atan(mpc(inf,0)).ae(pi2)
+    assert atan(mpc(inf,1)).ae(pi2)
+    assert atan(mpc(1,inf)).ae(pi2)
+    assert atan(mpc(0,inf)).ae(pi2)
+    assert atan(mpc(-1,inf)).ae(-pi2)
+    assert atan(mpc(-inf,1)).ae(-pi2)
+    assert atan(mpc(-inf,0)).ae(-pi2)
+    assert atan(mpc(-inf,-1)).ae(-pi2)
+    assert atan(mpc(-1,-inf)).ae(-pi2)
+    assert atan(mpc(0,-inf)).ae(-pi2)
+    assert atan(mpc(1,-inf)).ae(pi2)
+def test_atan2():
+    mp.dps = 15
+    assert atan2(1,1).ae(pi/4)
+    assert atan2(1,-1).ae(3*pi/4)
+    assert atan2(-1,-1).ae(-3*pi/4)
+    assert atan2(-1,1).ae(-pi/4)
+    assert atan2(-1,0).ae(-pi/2)
+    assert atan2(1,0).ae(pi/2)
+    assert atan2(0,0) == 0
+    assert atan2(inf,0).ae(pi/2)
+    assert atan2(-inf,0).ae(-pi/2)
+    assert isnan(atan2(inf,inf))
+    assert isnan(atan2(-inf,inf))
+    assert isnan(atan2(inf,-inf))
+    assert isnan(atan2(3,nan))
+    assert isnan(atan2(nan,3))
+    assert isnan(atan2(0,nan))
+    assert isnan(atan2(nan,0))
+    assert atan2(0,inf) == 0
+    assert atan2(0,-inf).ae(pi)
+    assert atan2(10,inf) == 0
+    assert atan2(-10,inf) == 0
+    assert atan2(-10,-inf).ae(-pi)
+    assert atan2(10,-inf).ae(pi)
+    assert atan2(inf,10).ae(pi/2)
+    assert atan2(inf,-10).ae(pi/2)
+    assert atan2(-inf,10).ae(-pi/2)
+    assert atan2(-inf,-10).ae(-pi/2)
+def test_areal_inverses():
+    assert asin(mpf(0)) == 0
+    assert asinh(mpf(0)) == 0
+    assert acosh(mpf(1)) == 0
+    assert isinstance(asin(mpf(0.5)), mpf)
+    assert isinstance(asin(mpf(2.0)), mpc)
+    assert isinstance(acos(mpf(0.5)), mpf)
+    assert isinstance(acos(mpf(2.0)), mpc)
+    assert isinstance(atanh(mpf(0.1)), mpf)
+    assert isinstance(atanh(mpf(1.1)), mpc)
+    random.seed(1)
+    for i in range(50):
+        x = random.uniform(0, 1)
+        assert asin(mpf(x)).ae(math.asin(x))
+        assert acos(mpf(x)).ae(math.acos(x))
+        x = random.uniform(-10, 10)
+        assert asinh(mpf(x)).ae(cmath.asinh(x).real)
+        assert isinstance(asinh(mpf(x)), mpf)
+        x = random.uniform(1, 10)
+        assert acosh(mpf(x)).ae(cmath.acosh(x).real)
+        assert isinstance(acosh(mpf(x)), mpf)
+        x = random.uniform(-10, 0.999)
+        assert isinstance(acosh(mpf(x)), mpc)
+        x = random.uniform(-1, 1)
+        assert atanh(mpf(x)).ae(cmath.atanh(x).real)
+        assert isinstance(atanh(mpf(x)), mpf)
+    dps = mp.dps
+    mp.dps = 300
+    assert isinstance(asin(0.5), mpf)
+    mp.dps = 1000
+    assert asin(1).ae(pi/2)
+    assert asin(-1).ae(-pi/2)
+    mp.dps = dps
+def test_invhyperb_inaccuracy():
+    mp.dps = 15
+    assert (asinh(1e-5)*10**5).ae(0.99999999998333333)
+    assert (asinh(1e-10)*10**10).ae(1)
+    assert (asinh(1e-50)*10**50).ae(1)
+    assert (asinh(-1e-5)*10**5).ae(-0.99999999998333333)
+    assert (asinh(-1e-10)*10**10).ae(-1)
+    assert (asinh(-1e-50)*10**50).ae(-1)
+    assert asinh(10**20).ae(46.744849040440862)
+    assert asinh(-10**20).ae(-46.744849040440862)
+    assert (tanh(1e-10)*10**10).ae(1)
+    assert (tanh(-1e-10)*10**10).ae(-1)
+    assert (atanh(1e-10)*10**10).ae(1)
+    assert (atanh(-1e-10)*10**10).ae(-1)
+def test_complex_functions():
+    for x in (list(range(10)) + list(range(-10,0))):
+        for y in (list(range(10)) + list(range(-10,0))):
+            z = complex(x, y)/4.3 + 0.01j
+            assert exp(mpc(z)).ae(cmath.exp(z))
+            assert log(mpc(z)).ae(cmath.log(z))
+            assert cos(mpc(z)).ae(cmath.cos(z))
+            assert sin(mpc(z)).ae(cmath.sin(z))
+            assert tan(mpc(z)).ae(cmath.tan(z))
+            assert sinh(mpc(z)).ae(cmath.sinh(z))
+            assert cosh(mpc(z)).ae(cmath.cosh(z))
+            assert tanh(mpc(z)).ae(cmath.tanh(z))
+def test_complex_inverse_functions():
+    mp.dps = 15
+    iv.dps = 15
+    for (z1, z2) in random_complexes(30):
+        # apparently cmath uses a different branch, so we
+        # can't use it for comparison
+        assert sinh(asinh(z1)).ae(z1)
+        #
+        assert acosh(z1).ae(cmath.acosh(z1))
+        assert atanh(z1).ae(cmath.atanh(z1))
+        assert atan(z1).ae(cmath.atan(z1))
+        # the reason we set a big eps here is that the cmath
+        # functions are inaccurate
+        assert asin(z1).ae(cmath.asin(z1), rel_eps=1e-12)
+        assert acos(z1).ae(cmath.acos(z1), rel_eps=1e-12)
+        one = mpf(1)
+    for i in range(-9, 10, 3):
+        for k in range(-9, 10, 3):
+            a = 0.9*j*10**k + 0.8*one*10**i
+            b = cos(acos(a))
+            assert b.ae(a)
+            b = sin(asin(a))
+            assert b.ae(a)
+    one = mpf(1)
+    err = 2*10**-15
+    for i in range(-9, 9, 3):
+        for k in range(-9, 9, 3):
+            a = -0.9*10**k + j*0.8*one*10**i
+            b = cosh(acosh(a))
+            assert b.ae(a, err)
+            b = sinh(asinh(a))
+            assert b.ae(a, err)
+def test_reciprocal_functions():
+    assert sec(3).ae(-1.01010866590799375)
+    assert csc(3).ae(7.08616739573718592)
+    assert cot(3).ae(-7.01525255143453347)
+    assert sech(3).ae(0.0993279274194332078)
+    assert csch(3).ae(0.0998215696688227329)
+    assert coth(3).ae(1.00496982331368917)
+    assert asec(3).ae(1.23095941734077468)
+    assert acsc(3).ae(0.339836909454121937)
+    assert acot(3).ae(0.321750554396642193)
+    assert asech(0.5).ae(1.31695789692481671)
+    assert acsch(3).ae(0.327450150237258443)
+    assert acoth(3).ae(0.346573590279972655)
+    assert acot(0).ae(1.5707963267948966192)
+    assert acoth(0).ae(1.5707963267948966192j)
+def test_ldexp():
+    mp.dps = 15
+    assert ldexp(mpf(2.5), 0) == 2.5
+    assert ldexp(mpf(2.5), -1) == 1.25
+    assert ldexp(mpf(2.5), 2) == 10
+    assert ldexp(mpf('inf'), 3) == mpf('inf')
+def test_frexp():
+    mp.dps = 15
+    assert frexp(0) == (0.0, 0)
+    assert frexp(9) == (0.5625, 4)
+    assert frexp(1) == (0.5, 1)
+    assert frexp(0.2) == (0.8, -2)
+    assert frexp(1000) == (0.9765625, 10)
+def test_aliases():
+    assert ln(7) == log(7)
+    assert log10(3.75) == log(3.75,10)
+    assert degrees(5.6) == 5.6 / degree
+    assert radians(5.6) == 5.6 * degree
+    assert power(-1,0.5) == j
+    assert fmod(25,7) == 4.0 and isinstance(fmod(25,7), mpf)
+def test_arg_sign():
+    assert arg(3) == 0
+    assert arg(-3).ae(pi)
+    assert arg(j).ae(pi/2)
+    assert arg(-j).ae(-pi/2)
+    assert arg(0) == 0
+    assert isnan(atan2(3,nan))
+    assert isnan(atan2(nan,3))
+    assert isnan(atan2(0,nan))
+    assert isnan(atan2(nan,0))
+    assert isnan(atan2(nan,nan))
+    assert arg(inf) == 0
+    assert arg(-inf).ae(pi)
+    assert isnan(arg(nan))
+    #assert arg(inf*j).ae(pi/2)
+    assert sign(0) == 0
+    assert sign(3) == 1
+    assert sign(-3) == -1
+    assert sign(inf) == 1
+    assert sign(-inf) == -1
+    assert isnan(sign(nan))
+    assert sign(j) == j
+    assert sign(-3*j) == -j
+    assert sign(1+j).ae((1+j)/sqrt(2))
+def test_misc_bugs():
+    # test that this doesn't raise an exception
+    mp.dps = 1000
+    log(1302)
+    mp.dps = 15
+def test_arange():
+    assert arange(10) == [mpf('0.0'), mpf('1.0'), mpf('2.0'), mpf('3.0'),
+                          mpf('4.0'), mpf('5.0'), mpf('6.0'), mpf('7.0'),
+                          mpf('8.0'), mpf('9.0')]
+    assert arange(-5, 5) == [mpf('-5.0'), mpf('-4.0'), mpf('-3.0'),
+                             mpf('-2.0'), mpf('-1.0'), mpf('0.0'),
+                             mpf('1.0'), mpf('2.0'), mpf('3.0'), mpf('4.0')]
+    assert arange(0, 1, 0.1) == [mpf('0.0'), mpf('0.10000000000000001'),
+                                 mpf('0.20000000000000001'),
+                                 mpf('0.30000000000000004'),
+                                 mpf('0.40000000000000002'),
+                                 mpf('0.5'), mpf('0.60000000000000009'),
+                                 mpf('0.70000000000000007'),
+                                 mpf('0.80000000000000004'),
+                                 mpf('0.90000000000000002')]
+    assert arange(17, -9, -3) == [mpf('17.0'), mpf('14.0'), mpf('11.0'),
+                                  mpf('8.0'), mpf('5.0'), mpf('2.0'),
+                                  mpf('-1.0'), mpf('-4.0'), mpf('-7.0')]
+    assert arange(0.2, 0.1, -0.1) == [mpf('0.20000000000000001')]
+    assert arange(0) == []
+    assert arange(1000, -1) == []
+    assert arange(-1.23, 3.21, -0.0000001) == []
+def test_linspace():
+    assert linspace(2, 9, 7) == [mpf('2.0'), mpf('3.166666666666667'),
+        mpf('4.3333333333333339'), mpf('5.5'), mpf('6.666666666666667'),
+        mpf('7.8333333333333339'), mpf('9.0')]
+    assert linspace(2, 9, 7, endpoint=0) == [mpf('2.0'), mpf('3.0'), mpf('4.0'),
+        mpf('5.0'), mpf('6.0'), mpf('7.0'), mpf('8.0')]
+    assert linspace(2, 7, 1) == [mpf(2)]
+def test_float_cbrt():
+    mp.dps = 30
+    for a in arange(0,10,0.1):
+        assert cbrt(a*a*a).ae(a, eps)
+    assert cbrt(-1).ae(0.5 + j*sqrt(3)/2)
+    one_third = mpf(1)/3
+    for a in arange(0,10,2.7) + [0.1 + 10**5]:
+        a = mpc(a + 1.1j)
+        r1 = cbrt(a)
+        mp.dps += 10
+        r2 = pow(a, one_third)
+        mp.dps -= 10
+        assert r1.ae(r2, eps)
+    mp.dps = 100
+    for n in range(100, 301, 100):
+        w = 10**n + j*10**-3
+        z = w*w*w
+        r = cbrt(z)
+        assert mpc_ae(r, w, eps)
+    mp.dps = 15
+def test_root():
+    mp.dps = 30
+    random.seed(1)
+    a = random.randint(0, 10000)
+    p = a*a*a
+    r = nthroot(mpf(p), 3)
+    assert r == a
+    for n in range(4, 10):
+        p = p*a
+        assert nthroot(mpf(p), n) == a
+    mp.dps = 40
+    for n in range(10, 5000, 100):
+        for a in [random.random()*10000, random.random()*10**100]:
+            r = nthroot(a, n)
+            r1 = pow(a, mpf(1)/n)
+            assert r.ae(r1)
+            r = nthroot(a, -n)
+            r1 = pow(a, -mpf(1)/n)
+            assert r.ae(r1)
+    # XXX: this is broken right now
+    # tests for nthroot rounding
+    for rnd in ['nearest', 'up', 'down']:
+        mp.rounding = rnd
+        for n in [-5, -3, 3, 5]:
+            prec = 50
+            for i in range(10):
+                mp.prec = prec
+                a = rand()
+                mp.prec = 2*prec
+                b = a**n
+                mp.prec = prec
+                r = nthroot(b, n)
+                assert r == a
+    mp.dps = 30
+    for n in range(3, 21):
+        a = (random.random() + j*random.random())
+        assert nthroot(a, n).ae(pow(a, mpf(1)/n))
+        assert mpc_ae(nthroot(a, n), pow(a, mpf(1)/n))
+        a = (random.random()*10**100 + j*random.random())
+        r = nthroot(a, n)
+        mp.dps += 4
+        r1 = pow(a, mpf(1)/n)
+        mp.dps -= 4
+        assert r.ae(r1)
+        assert mpc_ae(r, r1, eps)
+        r = nthroot(a, -n)
+        mp.dps += 4
+        r1 = pow(a, -mpf(1)/n)
+        mp.dps -= 4
+        assert r.ae(r1)
+        assert mpc_ae(r, r1, eps)
+    mp.dps = 15
+    assert nthroot(4, 1) == 4
+    assert nthroot(4, 0) == 1
+    assert nthroot(4, -1) == 0.25
+    assert nthroot(inf, 1) == inf
+    assert nthroot(inf, 2) == inf
+    assert nthroot(inf, 3) == inf
+    assert nthroot(inf, -1) == 0
+    assert nthroot(inf, -2) == 0
+    assert nthroot(inf, -3) == 0
+    assert nthroot(j, 1) == j
+    assert nthroot(j, 0) == 1
+    assert nthroot(j, -1) == -j
+    assert isnan(nthroot(nan, 1))
+    assert isnan(nthroot(nan, 0))
+    assert isnan(nthroot(nan, -1))
+    assert isnan(nthroot(inf, 0))
+    assert root(2,3) == nthroot(2,3)
+    assert root(16,4,0) == 2
+    assert root(16,4,1) == 2j
+    assert root(16,4,2) == -2
+    assert root(16,4,3) == -2j
+    assert root(16,4,4) == 2
+    assert root(-125,3,1) == -5
+def test_issue_136():
+    for dps in [20, 80]:
+        mp.dps = dps
+        r = nthroot(mpf('-1e-20'), 4)
+        assert r.ae(mpf(10)**(-5) * (1 + j) * mpf(2)**(-0.5))
+    mp.dps = 80
+    assert nthroot('-1e-3', 4).ae(mpf(10)**(-3./4) * (1 + j)/sqrt(2))
+    assert nthroot('-1e-6', 4).ae((1 + j)/(10 * sqrt(20)))
+    # Check that this doesn't take eternity to compute
+    mp.dps = 20
+    assert nthroot('-1e100000000', 4).ae((1+j)*mpf('1e25000000')/sqrt(2))
+    mp.dps = 15
+def test_mpcfun_real_imag():
+    mp.dps = 15
+    x = mpf(0.3)
+    y = mpf(0.4)
+    assert exp(mpc(x,0)) == exp(x)
+    assert exp(mpc(0,y)) == mpc(cos(y),sin(y))
+    assert cos(mpc(x,0)) == cos(x)
+    assert sin(mpc(x,0)) == sin(x)
+    assert cos(mpc(0,y)) == cosh(y)
+    assert sin(mpc(0,y)) == mpc(0,sinh(y))
+    assert cospi(mpc(x,0)) == cospi(x)
+    assert sinpi(mpc(x,0)) == sinpi(x)
+    assert cospi(mpc(0,y)).ae(cosh(pi*y))
+    assert sinpi(mpc(0,y)).ae(mpc(0,sinh(pi*y)))
+    c, s = cospi_sinpi(mpc(x,0))
+    assert c == cospi(x)
+    assert s == sinpi(x)
+    c, s = cospi_sinpi(mpc(0,y))
+    assert c.ae(cosh(pi*y))
+    assert s.ae(mpc(0,sinh(pi*y)))
+    c, s = cos_sin(mpc(x,0))
+    assert c == cos(x)
+    assert s == sin(x)
+    c, s = cos_sin(mpc(0,y))
+    assert c == cosh(y)
+    assert s == mpc(0,sinh(y))
+def test_perturbation_rounding():
+    mp.dps = 100
+    a = pi/10**50
+    b = -pi/10**50
+    c = 1 + a
+    d = 1 + b
+    mp.dps = 15
+    assert exp(a) == 1
+    assert exp(a, rounding='c') > 1
+    assert exp(b, rounding='c') == 1
+    assert exp(a, rounding='f') == 1
+    assert exp(b, rounding='f') < 1
+    assert cos(a) == 1
+    assert cos(a, rounding='c') == 1
+    assert cos(b, rounding='c') == 1
+    assert cos(a, rounding='f') < 1
+    assert cos(b, rounding='f') < 1
+    for f in [sin, atan, asinh, tanh]:
+        assert f(a) == +a
+        assert f(a, rounding='c') > a
+        assert f(a, rounding='f') < a
+        assert f(b) == +b
+        assert f(b, rounding='c') > b
+        assert f(b, rounding='f') < b
+    for f in [asin, tan, sinh, atanh]:
+        assert f(a) == +a
+        assert f(b) == +b
+        assert f(a, rounding='c') > a
+        assert f(b, rounding='c') > b
+        assert f(a, rounding='f') < a
+        assert f(b, rounding='f') < b
+    assert ln(c) == +a
+    assert ln(d) == +b
+    assert ln(c, rounding='c') > a
+    assert ln(c, rounding='f') < a
+    assert ln(d, rounding='c') > b
+    assert ln(d, rounding='f') < b
+    assert cosh(a) == 1
+    assert cosh(b) == 1
+    assert cosh(a, rounding='c') > 1
+    assert cosh(b, rounding='c') > 1
+    assert cosh(a, rounding='f') == 1
+    assert cosh(b, rounding='f') == 1
+def test_integer_parts():
+    assert floor(3.2) == 3
+    assert ceil(3.2) == 4
+    assert floor(3.2+5j) == 3+5j
+    assert ceil(3.2+5j) == 4+5j
+def test_complex_parts():
+    assert fabs('3') == 3
+    assert fabs(3+4j) == 5
+    assert re(3) == 3
+    assert re(1+4j) == 1
+    assert im(3) == 0
+    assert im(1+4j) == 4
+    assert conj(3) == 3
+    assert conj(3+4j) == 3-4j
+    assert mpf(3).conjugate() == 3
+def test_cospi_sinpi():
+    assert sinpi(0) == 0
+    assert sinpi(0.5) == 1
+    assert sinpi(1) == 0
+    assert sinpi(1.5) == -1
+    assert sinpi(2) == 0
+    assert sinpi(2.5) == 1
+    assert sinpi(-0.5) == -1
+    assert cospi(0) == 1
+    assert cospi(0.5) == 0
+    assert cospi(1) == -1
+    assert cospi(1.5) == 0
+    assert cospi(2) == 1
+    assert cospi(2.5) == 0
+    assert cospi(-0.5) == 0
+    assert cospi(100000000000.25).ae(sqrt(2)/2)
+    a = cospi(2+3j)
+    assert a.real.ae(cos((2+3j)*pi).real)
+    assert a.imag == 0
+    b = sinpi(2+3j)
+    assert b.imag.ae(sin((2+3j)*pi).imag)
+    assert b.real == 0
+    mp.dps = 35
+    x1 = mpf(10000) - mpf('1e-15')
+    x2 = mpf(10000) + mpf('1e-15')
+    x3 = mpf(10000.5) - mpf('1e-15')
+    x4 = mpf(10000.5) + mpf('1e-15')
+    x5 = mpf(10001) - mpf('1e-15')
+    x6 = mpf(10001) + mpf('1e-15')
+    x7 = mpf(10001.5) - mpf('1e-15')
+    x8 = mpf(10001.5) + mpf('1e-15')
+    mp.dps = 15
+    M = 10**15
+    assert (sinpi(x1)*M).ae(-pi)
+    assert (sinpi(x2)*M).ae(pi)
+    assert (cospi(x3)*M).ae(pi)
+    assert (cospi(x4)*M).ae(-pi)
+    assert (sinpi(x5)*M).ae(pi)
+    assert (sinpi(x6)*M).ae(-pi)
+    assert (cospi(x7)*M).ae(-pi)
+    assert (cospi(x8)*M).ae(pi)
+    assert 0.999 < cospi(x1, rounding='d') < 1
+    assert 0.999 < cospi(x2, rounding='d') < 1
+    assert 0.999 < sinpi(x3, rounding='d') < 1
+    assert 0.999 < sinpi(x4, rounding='d') < 1
+    assert -1 < cospi(x5, rounding='d') < -0.999
+    assert -1 < cospi(x6, rounding='d') < -0.999
+    assert -1 < sinpi(x7, rounding='d') < -0.999
+    assert -1 < sinpi(x8, rounding='d') < -0.999
+    assert (sinpi(1e-15)*M).ae(pi)
+    assert (sinpi(-1e-15)*M).ae(-pi)
+    assert cospi(1e-15) == 1
+    assert cospi(1e-15, rounding='d') < 1
+def test_expj():
+    assert expj(0) == 1
+    assert expj(1).ae(exp(j))
+    assert expj(j).ae(exp(-1))
+    assert expj(1+j).ae(exp(j*(1+j)))
+    assert expjpi(0) == 1
+    assert expjpi(1).ae(exp(j*pi))
+    assert expjpi(j).ae(exp(-pi))
+    assert expjpi(1+j).ae(exp(j*pi*(1+j)))
+    assert expjpi(-10**15 * j).ae('2.22579818340535731e+1364376353841841')
+def test_sinc():
+    assert sinc(0) == sincpi(0) == 1
+    assert sinc(inf) == sincpi(inf) == 0
+    assert sinc(-inf) == sincpi(-inf) == 0
+    assert sinc(2).ae(0.45464871341284084770)
+    assert sinc(2+3j).ae(0.4463290318402435457-2.7539470277436474940j)
+    assert sincpi(2) == 0
+    assert sincpi(1.5).ae(-0.212206590789193781)
+def test_fibonacci():
+    mp.dps = 15
+    assert [fibonacci(n) for n in range(-5, 10)] == \
+        [5, -3, 2, -1, 1, 0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
+    assert fib(2.5).ae(1.4893065462657091)
+    assert fib(3+4j).ae(-5248.51130728372 - 14195.962288353j)
+    assert fib(1000).ae(4.3466557686937455e+208)
+    assert str(fib(10**100)) == '6.24499112864607e+2089876402499787337692720892375554168224592399182109535392875613974104853496745963277658556235103534'
+    mp.dps = 2100
+    a = fib(10000)
+    assert a % 10**10 == 9947366875
+    mp.dps = 15
+    assert fibonacci(inf) == inf
+    assert fib(3+0j) == 2
+def test_call_with_dps():
+    mp.dps = 15
+    assert abs(exp(1, dps=30)-e(dps=35)) < 1e-29
+def test_tanh():
+    mp.dps = 15
+    assert tanh(0) == 0
+    assert tanh(inf) == 1
+    assert tanh(-inf) == -1
+    assert isnan(tanh(nan))
+    assert tanh(mpc('inf', '0')) == 1
+def test_atanh():
+    mp.dps = 15
+    assert atanh(0) == 0
+    assert atanh(0.5).ae(0.54930614433405484570)
+    assert atanh(-0.5).ae(-0.54930614433405484570)
+    assert atanh(1) == inf
+    assert atanh(-1) == -inf
+    assert isnan(atanh(nan))
+    assert isinstance(atanh(1), mpf)
+    assert isinstance(atanh(-1), mpf)
+    # Limits at infinity
+    jpi2 = j*pi/2
+    assert atanh(inf).ae(-jpi2)
+    assert atanh(-inf).ae(jpi2)
+    assert atanh(mpc(inf,-1)).ae(-jpi2)
+    assert atanh(mpc(inf,0)).ae(-jpi2)
+    assert atanh(mpc(inf,1)).ae(jpi2)
+    assert atanh(mpc(1,inf)).ae(jpi2)
+    assert atanh(mpc(0,inf)).ae(jpi2)
+    assert atanh(mpc(-1,inf)).ae(jpi2)
+    assert atanh(mpc(-inf,1)).ae(jpi2)
+    assert atanh(mpc(-inf,0)).ae(jpi2)
+    assert atanh(mpc(-inf,-1)).ae(-jpi2)
+    assert atanh(mpc(-1,-inf)).ae(-jpi2)
+    assert atanh(mpc(0,-inf)).ae(-jpi2)
+    assert atanh(mpc(1,-inf)).ae(-jpi2)
+def test_expm1():
+    mp.dps = 15
+    assert expm1(0) == 0
+    assert expm1(3).ae(exp(3)-1)
+    assert expm1(inf) == inf
+    assert expm1(1e-50).ae(1e-50)
+    assert (expm1(1e-10)*1e10).ae(1.00000000005)
+def test_log1p():
+    mp.dps = 15
+    assert log1p(0) == 0
+    assert log1p(3).ae(log(1+3))
+    assert log1p(inf) == inf
+    assert log1p(1e-50).ae(1e-50)
+    assert (log1p(1e-10)*1e10).ae(0.99999999995)
+def test_powm1():
+    mp.dps = 15
+    assert powm1(2,3) == 7
+    assert powm1(-1,2) == 0
+    assert powm1(-1,0) == 0
+    assert powm1(-2,0) == 0
+    assert powm1(3+4j,0) == 0
+    assert powm1(0,1) == -1
+    assert powm1(0,0) == 0
+    assert powm1(1,0) == 0
+    assert powm1(1,2) == 0
+    assert powm1(1,3+4j) == 0
+    assert powm1(1,5) == 0
+    assert powm1(j,4) == 0
+    assert powm1(-j,4) == 0
+    assert (powm1(2,1e-100)*1e100).ae(ln2)
+    assert powm1(2,'1e-100000000000') != 0
+    assert (powm1(fadd(1,1e-100,exact=True), 5)*1e100).ae(5)
+def test_unitroots():
+    assert unitroots(1) == [1]
+    assert unitroots(2) == [1, -1]
+    a, b, c = unitroots(3)
+    assert a == 1
+    assert b.ae(-0.5 + 0.86602540378443864676j)
+    assert c.ae(-0.5 - 0.86602540378443864676j)
+    assert unitroots(1, primitive=True) == [1]
+    assert unitroots(2, primitive=True) == [-1]
+    assert unitroots(3, primitive=True) == unitroots(3)[1:]
+    assert unitroots(4, primitive=True) == [j, -j]
+    assert len(unitroots(17, primitive=True)) == 16
+    assert len(unitroots(16, primitive=True)) == 8
+def test_cyclotomic():
+    mp.dps = 15
+    assert [cyclotomic(n,1) for n in range(31)] == [1,0,2,3,2,5,1,7,2,3,1,11,1,13,1,1,2,17,1,19,1,1,1,23,1,5,1,3,1,29,1]
+    assert [cyclotomic(n,-1) for n in range(31)] == [1,-2,0,1,2,1,3,1,2,1,5,1,1,1,7,1,2,1,3,1,1,1,11,1,1,1,13,1,1,1,1]
+    assert [cyclotomic(n,j) for n in range(21)] == [1,-1+j,1+j,j,0,1,-j,j,2,-j,1,j,3,1,-j,1,2,1,j,j,5]
+    assert [cyclotomic(n,-j) for n in range(21)] == [1,-1-j,1-j,-j,0,1,j,-j,2,j,1,-j,3,1,j,1,2,1,-j,-j,5]
+    assert cyclotomic(1624,j) == 1
+    assert cyclotomic(33600,j) == 1
+    u = sqrt(j, prec=500)
+    assert cyclotomic(8, u).ae(0)
+    assert cyclotomic(30, u).ae(5.8284271247461900976)
+    assert cyclotomic(2040, u).ae(1)
+    assert cyclotomic(0,2.5) == 1
+    assert cyclotomic(1,2.5) == 2.5-1
+    assert cyclotomic(2,2.5) == 2.5+1
+    assert cyclotomic(3,2.5) == 2.5**2 + 2.5 + 1
+    assert cyclotomic(7,2.5) == 406.234375

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cudaGL.h ADDED Viewed

	@@ -0,0 +1,605 @@

+/*
+ * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#ifndef CUDAGL_H
+#define CUDAGL_H
+#include <cuda.h>
+#include <GL/gl.h>
+#if defined(__CUDA_API_VERSION_INTERNAL) || defined(__DOXYGEN_ONLY__) || defined(CUDA_ENABLE_DEPRECATED)
+#define __CUDA_DEPRECATED
+#elif defined(_MSC_VER)
+#define __CUDA_DEPRECATED __declspec(deprecated)
+#elif defined(__GNUC__)
+#define __CUDA_DEPRECATED __attribute__((deprecated))
+#else
+#define __CUDA_DEPRECATED
+#endif
+#ifdef CUDA_FORCE_API_VERSION
+#error "CUDA_FORCE_API_VERSION is no longer supported."
+#endif
+#if defined(__CUDA_API_VERSION_INTERNAL) || defined(CUDA_API_PER_THREAD_DEFAULT_STREAM)
+    #define __CUDA_API_PER_THREAD_DEFAULT_STREAM
+    #define __CUDA_API_PTDS(api) api ## _ptds
+    #define __CUDA_API_PTSZ(api) api ## _ptsz
+#else
+    #define __CUDA_API_PTDS(api) api
+    #define __CUDA_API_PTSZ(api) api
+#endif
+#define cuGLCtxCreate            cuGLCtxCreate_v2
+#define cuGLMapBufferObject      __CUDA_API_PTDS(cuGLMapBufferObject_v2)
+#define cuGLMapBufferObjectAsync __CUDA_API_PTSZ(cuGLMapBufferObjectAsync_v2)
+#define cuGLGetDevices           cuGLGetDevices_v2
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * \file cudaGL.h
+ * \brief Header file for the OpenGL interoperability functions of the
+ * low-level CUDA driver application programming interface.
+ */
+/**
+ * \defgroup CUDA_GL OpenGL Interoperability
+ * \ingroup CUDA_DRIVER
+ *
+ * ___MANBRIEF___ OpenGL interoperability functions of the low-level CUDA
+ * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
+ *
+ * This section describes the OpenGL interoperability functions of the
+ * low-level CUDA driver application programming interface. Note that mapping
+ * of OpenGL resources is performed with the graphics API agnostic, resource
+ * mapping interface described in \ref CUDA_GRAPHICS "Graphics Interoperability".
+ *
+ * @{
+ */
+#if defined(_WIN32)
+#if !defined(WGL_NV_gpu_affinity)
+typedef void* HGPUNV;
+#endif
+#endif /* _WIN32 */
+/**
+ * \brief Registers an OpenGL buffer object
+ *
+ * Registers the buffer object specified by \p buffer for access by
+ * CUDA.  A handle to the registered object is returned as \p
+ * pCudaResource.  The register flags \p Flags specify the intended usage,
+ * as follows:
+ *
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_NONE: Specifies no hints about how this
+ *   resource will be used. It is therefore assumed that this resource will be
+ *   read from and written to by CUDA. This is the default value.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: Specifies that CUDA
+ *   will not write to this resource.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that
+ *   CUDA will not read from this resource and will write over the
+ *   entire contents of the resource, so none of the data previously
+ *   stored in the resource will be preserved.
+ *
+ * \param pCudaResource - Pointer to the returned object handle
+ * \param buffer - name of buffer object to be registered
+ * \param Flags - Register flags
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_INVALID_HANDLE,
+ * ::CUDA_ERROR_ALREADY_MAPPED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * \notefnerr
+ *
+ * \sa
+ * ::cuGraphicsUnregisterResource,
+ * ::cuGraphicsMapResources,
+ * ::cuGraphicsResourceGetMappedPointer,
+ * ::cudaGraphicsGLRegisterBuffer
+ */
+CUresult CUDAAPI cuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
+/**
+ * \brief Register an OpenGL texture or renderbuffer object
+ *
+ * Registers the texture or renderbuffer object specified by \p image for access by CUDA.
+ * A handle to the registered object is returned as \p pCudaResource.
+ *
+ * \p target must match the type of the object, and must be one of ::GL_TEXTURE_2D,
+ * ::GL_TEXTURE_RECTANGLE, ::GL_TEXTURE_CUBE_MAP, ::GL_TEXTURE_3D, ::GL_TEXTURE_2D_ARRAY,
+ * or ::GL_RENDERBUFFER.
+ *
+ * The register flags \p Flags specify the intended usage, as follows:
+ *
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_NONE: Specifies no hints about how this
+ *   resource will be used. It is therefore assumed that this resource will be
+ *   read from and written to by CUDA. This is the default value.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: Specifies that CUDA
+ *   will not write to this resource.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that
+ *   CUDA will not read from this resource and will write over the
+ *   entire contents of the resource, so none of the data previously
+ *   stored in the resource will be preserved.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST: Specifies that CUDA will
+ *   bind this resource to a surface reference.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will perform
+ *   texture gather operations on this resource.
+ *
+ * The following image formats are supported. For brevity's sake, the list is abbreviated.
+ * For ex., {GL_R, GL_RG} X {8, 16} would expand to the following 4 formats
+ * {GL_R8, GL_R16, GL_RG8, GL_RG16} :
+ * - GL_RED, GL_RG, GL_RGBA, GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY
+ * - {GL_R, GL_RG, GL_RGBA} X {8, 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I}
+ * - {GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY} X
+ * {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT, 16UI_EXT, 32UI_EXT, 8I_EXT, 16I_EXT, 32I_EXT}
+ *
+ * The following image classes are currently disallowed:
+ * - Textures with borders
+ * - Multisampled renderbuffers
+ *
+ * \param pCudaResource - Pointer to the returned object handle
+ * \param image - name of texture or renderbuffer object to be registered
+ * \param target - Identifies the type of object specified by \p image
+ * \param Flags - Register flags
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_INVALID_HANDLE,
+ * ::CUDA_ERROR_ALREADY_MAPPED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * \notefnerr
+ *
+ * \sa
+ * ::cuGraphicsUnregisterResource,
+ * ::cuGraphicsMapResources,
+ * ::cuGraphicsSubResourceGetMappedArray,
+ * ::cudaGraphicsGLRegisterImage
+ */
+CUresult CUDAAPI cuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
+#ifdef _WIN32
+/**
+ * \brief Gets the CUDA device associated with hGpu
+ *
+ * Returns in \p *pDevice the CUDA device associated with a \p hGpu, if
+ * applicable.
+ *
+ * \param pDevice - Device associated with hGpu
+ * \param hGpu    - Handle to a GPU, as queried via ::WGL_NV_gpu_affinity()
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGLMapBufferObject,
+ * ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
+ * ::cuGLUnregisterBufferObject, ::cuGLUnmapBufferObjectAsync,
+ * ::cuGLSetBufferObjectMapFlags,
+ * ::cudaWGLGetDevice
+ */
+CUresult CUDAAPI cuWGLGetDevice(CUdevice *pDevice, HGPUNV hGpu);
+#endif /* _WIN32 */
+/**
+ * CUDA devices corresponding to an OpenGL device
+ */
+typedef enum CUGLDeviceList_enum {
+    CU_GL_DEVICE_LIST_ALL            = 0x01, /**< The CUDA devices for all GPUs used by the current OpenGL context */
+    CU_GL_DEVICE_LIST_CURRENT_FRAME  = 0x02, /**< The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame */
+    CU_GL_DEVICE_LIST_NEXT_FRAME     = 0x03, /**< The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame */
+} CUGLDeviceList;
+/**
+ * \brief Gets the CUDA devices associated with the current OpenGL context
+ *
+ * Returns in \p *pCudaDeviceCount the number of CUDA-compatible devices
+ * corresponding to the current OpenGL context. Also returns in \p *pCudaDevices
+ * at most cudaDeviceCount of the CUDA-compatible devices corresponding to
+ * the current OpenGL context. If any of the GPUs being used by the current OpenGL
+ * context are not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE.
+ *
+ * The \p deviceList argument may be any of the following:
+ * - ::CU_GL_DEVICE_LIST_ALL: Query all devices used by the current OpenGL context.
+ * - ::CU_GL_DEVICE_LIST_CURRENT_FRAME: Query the devices used by the current OpenGL context to
+ *   render the current frame (in SLI).
+ * - ::CU_GL_DEVICE_LIST_NEXT_FRAME: Query the devices used by the current OpenGL context to
+ *   render the next frame (in SLI). Note that this is a prediction, it can't be guaranteed that
+ *   this is correct in all cases.
+ *
+ * \param pCudaDeviceCount - Returned number of CUDA devices.
+ * \param pCudaDevices     - Returned CUDA devices.
+ * \param cudaDeviceCount  - The size of the output device array pCudaDevices.
+ * \param deviceList       - The set of devices to return.
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_NO_DEVICE,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_GRAPHICS_CONTEXT
+ *
+ * \notefnerr
+ *
+ * \sa
+ * ::cuWGLGetDevice,
+ * ::cudaGLGetDevices
+ */
+CUresult CUDAAPI cuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
+/**
+ * \defgroup CUDA_GL_DEPRECATED OpenGL Interoperability [DEPRECATED]
+ *
+ * ___MANBRIEF___ deprecated OpenGL interoperability functions of the low-level
+ * CUDA driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
+ *
+ * This section describes deprecated OpenGL interoperability functionality.
+ *
+ * @{
+ */
+/** Flags to map or unmap a resource */
+typedef enum CUGLmap_flags_enum {
+    CU_GL_MAP_RESOURCE_FLAGS_NONE          = 0x00,
+    CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
+    CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} CUGLmap_flags;
+/**
+ * \brief Create a CUDA context for interoperability with OpenGL
+ *
+ * \deprecated This function is deprecated as of Cuda 5.0.
+ *
+ * This function is deprecated and should no longer be used.  It is
+ * no longer necessary to associate a CUDA context with an OpenGL
+ * context in order to achieve maximum interoperability performance.
+ *
+ * \param pCtx   - Returned CUDA context
+ * \param Flags  - Options for CUDA context creation
+ * \param device - Device on which to create the context
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_OUT_OF_MEMORY
+ * \notefnerr
+ *
+ * \sa ::cuCtxCreate, ::cuGLInit, ::cuGLMapBufferObject,
+ * ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
+ * ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
+ * ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
+ * ::cuWGLGetDevice
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
+/**
+ * \brief Initializes OpenGL interoperability
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Initializes OpenGL interoperability. This function is deprecated
+ * and calling it is no longer required. It may fail if the needed
+ * OpenGL driver facilities are not available.
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_UNKNOWN
+ * \notefnerr
+ *
+ * \sa ::cuGLMapBufferObject,
+ * ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
+ * ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
+ * ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
+ * ::cuWGLGetDevice
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLInit(void);
+/**
+ * \brief Registers an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Registers the buffer object specified by \p buffer for access by
+ * CUDA. This function must be called before CUDA can map the buffer
+ * object.  There must be a valid OpenGL context bound to the current
+ * thread when this function is called, and the buffer name is
+ * resolved by that context.
+ *
+ * \param buffer - The name of the buffer object to register.
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_ALREADY_MAPPED
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsGLRegisterBuffer
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLRegisterBufferObject(GLuint buffer);
+/**
+ * \brief Maps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Maps the buffer object specified by \p buffer into the address space of the
+ * current CUDA context and returns in \p *dptr and \p *size the base pointer
+ * and size of the resulting mapping.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called.  This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * All streams in the current CUDA context are synchronized with the
+ * current GL context.
+ *
+ * \param dptr   - Returned mapped base pointer
+ * \param size   - Returned size of mapping
+ * \param buffer - The name of the buffer object to map
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_MAP_FAILED
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsMapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr *dptr, size_t *size,  GLuint buffer);
+/**
+ * \brief Unmaps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Unmaps the buffer object specified by \p buffer for access by CUDA.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called.  This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * All streams in the current CUDA context are synchronized with the
+ * current GL context.
+ *
+ * \param buffer - Buffer object to unmap
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsUnmapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLUnmapBufferObject(GLuint buffer);
+/**
+ * \brief Unregister an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Unregisters the buffer object specified by \p buffer.  This
+ * releases any resources associated with the registered buffer.
+ * After this call, the buffer may no longer be mapped for access by
+ * CUDA.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called.  This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * \param buffer - Name of the buffer object to unregister
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsUnregisterResource
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLUnregisterBufferObject(GLuint buffer);
+/**
+ * \brief Set the map flags for an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Sets the map flags for the buffer object specified by \p buffer.
+ *
+ * Changes to \p Flags will take effect the next time \p buffer is mapped.
+ * The \p Flags argument may be any of the following:
+ * - ::CU_GL_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this
+ *   resource will be used. It is therefore assumed that this resource will be
+ *   read from and written to by CUDA kernels. This is the default value.
+ * - ::CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA kernels which
+ *   access this resource will not write to this resource.
+ * - ::CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels
+ *   which access this resource will not read from this resource and will
+ *   write over the entire contents of the resource, so none of the data
+ *   previously stored in the resource will be preserved.
+ *
+ * If \p buffer has not been registered for use with CUDA, then
+ * ::CUDA_ERROR_INVALID_HANDLE is returned. If \p buffer is presently
+ * mapped for access by CUDA, then ::CUDA_ERROR_ALREADY_MAPPED is returned.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called.  This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * \param buffer - Buffer object to unmap
+ * \param Flags  - Map flags
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_HANDLE,
+ * ::CUDA_ERROR_ALREADY_MAPPED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsResourceSetMapFlags
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Flags);
+/**
+ * \brief Maps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Maps the buffer object specified by \p buffer into the address space of the
+ * current CUDA context and returns in \p *dptr and \p *size the base pointer
+ * and size of the resulting mapping.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called.  This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * Stream \p hStream in the current CUDA context is synchronized with
+ * the current GL context.
+ *
+ * \param dptr    - Returned mapped base pointer
+ * \param size    - Returned size of mapping
+ * \param buffer  - The name of the buffer object to map
+ * \param hStream - Stream to synchronize
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_MAP_FAILED
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsMapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr *dptr, size_t *size,  GLuint buffer, CUstream hStream);
+/**
+ * \brief Unmaps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Unmaps the buffer object specified by \p buffer for access by CUDA.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called.  This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * Stream \p hStream in the current CUDA context is synchronized with
+ * the current GL context.
+ *
+ * \param buffer  - Name of the buffer object to unmap
+ * \param hStream - Stream to synchronize
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsUnmapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
+/** @} */ /* END CUDA_GL_DEPRECATED */
+/** @} */ /* END CUDA_GL */
+#if defined(__CUDA_API_VERSION_INTERNAL)
+    #undef cuGLCtxCreate
+    #undef cuGLMapBufferObject
+    #undef cuGLMapBufferObjectAsync
+    #undef cuGLGetDevices
+    CUresult CUDAAPI cuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
+    CUresult CUDAAPI cuGLMapBufferObject_v2(CUdeviceptr *dptr, size_t *size,  GLuint buffer);
+    CUresult CUDAAPI cuGLMapBufferObjectAsync_v2(CUdeviceptr *dptr, size_t *size,  GLuint buffer, CUstream hStream);
+    CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
+    CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr_v1 *dptr, unsigned int *size,  GLuint buffer);
+    CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr_v1 *dptr, unsigned int *size,  GLuint buffer, CUstream hStream);
+#endif /* __CUDA_API_VERSION_INTERNAL */
+#ifdef __cplusplus
+};
+#endif
+#undef __CUDA_DEPRECATED
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cuda_device_runtime_api.h ADDED Viewed

	@@ -0,0 +1,268 @@

+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__CUDA_DEVICE_RUNTIME_API_H__)
+#define __CUDA_DEVICE_RUNTIME_API_H__
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#if !defined(__CUDACC_RTC__)
+#if !defined(__CUDACC_INTERNAL_NO_STUBS__) && !defined(__CUDACC_RDC__) && !defined(__CUDACC_EWP__) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__)
+#if defined(__cplusplus)
+extern "C" {
+#endif
+struct cudaFuncAttributes;
+inline __device__  cudaError_t CUDARTAPI cudaMalloc(void **p, size_t s)
+{
+  return cudaErrorUnknown;
+}
+inline __device__  cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *p, const void *c)
+{
+  return cudaErrorUnknown;
+}
+inline __device__  cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device)
+{
+  return cudaErrorUnknown;
+}
+inline __device__  cudaError_t CUDARTAPI cudaGetDevice(int *device)
+{
+  return cudaErrorUnknown;
+}
+inline __device__  cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize)
+{
+  return cudaErrorUnknown;
+}
+inline __device__  cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags)
+{
+  return cudaErrorUnknown;
+}
+#if defined(__cplusplus)
+}
+#endif
+#endif /* !defined(__CUDACC_INTERNAL_NO_STUBS__) && !defined(__CUDACC_RDC__) &&  !defined(__CUDACC_EWP__) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__) */
+#endif /* !defined(__CUDACC_RTC__) */
+#if defined(__DOXYGEN_ONLY__) || defined(CUDA_ENABLE_DEPRECATED)
+# define __DEPRECATED__(msg)
+#elif defined(_WIN32)
+# define __DEPRECATED__(msg) __declspec(deprecated(msg))
+#elif (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5 && !defined(__clang__))))
+# define __DEPRECATED__(msg) __attribute__((deprecated))
+#else
+# define __DEPRECATED__(msg) __attribute__((deprecated(msg)))
+#endif
+#if defined(__CUDA_ARCH__) && !defined(__CDPRT_SUPPRESS_SYNC_DEPRECATION_WARNING)
+# define __CDPRT_DEPRECATED(func_name) __DEPRECATED__("Use of "#func_name" from device code is deprecated. Moreover, such use will cause this module to fail to load on sm_90+ devices. If calls to "#func_name" from device code cannot be removed for older devices at this time, you may guard them with __CUDA_ARCH__ macros to remove them only for sm_90+ devices, making sure to generate code for compute_90 for the macros to take effect. Note that this mitigation will no longer work when support for "#func_name" from device code is eventually dropped for all devices. Disable this warning with -D__CDPRT_SUPPRESS_SYNC_DEPRECATION_WARNING.")
+#else
+# define __CDPRT_DEPRECATED(func_name)
+#endif
+#if defined(__cplusplus) && defined(__CUDACC__)         /* Visible to nvcc front-end only */
+#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350)   // Visible to SM>=3.5 and "__host__ __device__" only
+#include "driver_types.h"
+#include "crt/host_defines.h"
+extern "C"
+{
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig);
+#if (__CUDA_ARCH__ < 900)
+// cudaDeviceSynchronize is removed on sm_90+
+extern __device__ __cudart_builtin__ __CDPRT_DEPRECATED(cudaDeviceSynchronize) cudaError_t CUDARTAPI cudaDeviceSynchronize(void);
+#endif
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI __cudaDeviceSynchronizeDeprecationAvoidance(void);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetLastError(void);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaPeekAtLastError(void);
+extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorString(cudaError_t error);
+extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorName(cudaError_t error);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDevice(int *device);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamDestroy(cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent_ptsz(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord(cudaEvent_t event, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord_ptsz(cudaEvent_t event, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecordWithFlags_ptsz(cudaEvent_t event, cudaStream_t stream, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventDestroy(cudaEvent_t event);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFree(void *devPtr);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMalloc(void **devPtr, size_t size);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync_ptsz(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync_ptsz(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync_ptsz(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync_ptsz(void *devPtr, int value, size_t count, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync_ptsz(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync_ptsz(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaRuntimeGetVersion(int *runtimeVersion);
+/**
+ * \ingroup CUDART_EXECUTION
+ * \brief Obtains a parameter buffer
+ *
+ * Obtains a parameter buffer which can be filled with parameters for a kernel launch.
+ * Parameters passed to ::cudaLaunchDevice must be allocated via this function.
+ *
+ * This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
+ * CUDA user code should use <<< >>> to launch kernels.
+ *
+ * \param alignment - Specifies alignment requirement of the parameter buffer
+ * \param size      - Specifies size requirement in bytes
+ *
+ * \return
+ * Returns pointer to the allocated parameterBuffer
+ * \notefnerr
+ *
+ * \sa cudaLaunchDevice
+ */
+extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBuffer(size_t alignment, size_t size);
+/**
+ * \ingroup CUDART_EXECUTION
+ * \brief Launches a specified kernel
+ *
+ * Launches a specified kernel with the specified parameter buffer. A parameter buffer can be obtained
+ * by calling ::cudaGetParameterBuffer().
+ *
+ * This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
+ * CUDA user code should use <<< >>> to launch the kernels.
+ *
+ * \param func            - Pointer to the kernel to be launched
+ * \param parameterBuffer - Holds the parameters to the launched kernel. parameterBuffer can be NULL. (Optional)
+ * \param gridDimension   - Specifies grid dimensions
+ * \param blockDimension  - Specifies block dimensions
+ * \param sharedMemSize   - Specifies size of shared memory
+ * \param stream          - Specifies the stream to be used
+ *
+ * \return
+ * ::cudaSuccess, ::cudaErrorInvalidDevice, ::cudaErrorLaunchMaxDepthExceeded, ::cudaErrorInvalidConfiguration,
+ * ::cudaErrorStartupFailure, ::cudaErrorLaunchPendingCountExceeded, ::cudaErrorLaunchOutOfResources
+ * \notefnerr
+ * \n Please refer to Execution Configuration and Parameter Buffer Layout from the CUDA Programming
+ * Guide for the detailed descriptions of launch configuration and parameter layout respectively.
+ *
+ * \sa cudaGetParameterBuffer
+ */
+extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBufferV2(void *func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice_ptsz(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2_ptsz(void *parameterBuffer, cudaStream_t stream);
+#if defined(CUDA_API_PER_THREAD_DEFAULT_STREAM) && defined(__CUDA_ARCH__)
+    // When compiling for the device and per thread default stream is enabled, add
+    // a static inline redirect to the per thread stream entry points.
+    static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
+    cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream)
+    {
+        return cudaLaunchDevice_ptsz(func, parameterBuffer, gridDimension, blockDimension, sharedMemSize, stream);
+    }
+    static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
+    cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream)
+    {
+        return cudaLaunchDeviceV2_ptsz(parameterBuffer, stream);
+    }
+#else
+    extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
+    extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream);
+#endif
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
+extern __device__ __cudart_builtin__ unsigned long long CUDARTAPI cudaCGGetIntrinsicHandle(enum cudaCGScope scope);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGSynchronize(unsigned long long handle, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGSynchronizeGrid(unsigned long long handle, unsigned int flags);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGGetSize(unsigned int *numThreads, unsigned int *numGrids, unsigned long long handle);
+extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGGetRank(unsigned int *threadRank, unsigned int *gridRank, unsigned long long handle);
+}
+template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaMalloc(T **devPtr, size_t size);
+template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, T *entry);
+template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize);
+template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
+#endif // !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350)
+#endif /* defined(__cplusplus) && defined(__CUDACC__) */
+#undef __DEPRECATED__
+#undef __CDPRT_DEPRECATED
+#endif /* !__CUDA_DEVICE_RUNTIME_API_H__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cuda_surface_types.h ADDED Viewed

	@@ -0,0 +1,103 @@

+/*
+ * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__CUDA_SURFACE_TYPES_H__)
+#define __CUDA_SURFACE_TYPES_H__
+#if defined(__cplusplus) && defined(__CUDACC__)
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#if !defined(__CUDACC_RTC__)
+#define EXCLUDE_FROM_RTC
+#include "channel_descriptor.h"
+#undef EXCLUDE_FROM_RTC
+#endif /* !__CUDACC_RTC__ */
+#include "cuda_runtime_api.h"
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+template<class T, int dim = 1>
+struct __device_builtin_surface_type__ surface : public surfaceReference
+{
+#if !defined(__CUDACC_RTC__)
+  __host__ surface(void)
+  {
+    channelDesc = cudaCreateChannelDesc<T>();
+  }
+  __host__ surface(struct cudaChannelFormatDesc desc)
+  {
+    channelDesc = desc;
+  }
+#endif /* !__CUDACC_RTC__ */
+};
+template<int dim>
+struct  __device_builtin_surface_type__  surface<void, dim> : public surfaceReference
+{
+#if !defined(__CUDACC_RTC__)
+  __host__ surface(void)
+  {
+    channelDesc = cudaCreateChannelDesc<void>();
+  }
+#endif /* !__CUDACC_RTC__ */
+};
+#endif /* __cplusplus && __CUDACC__ */
+#endif /* !__CUDA_SURFACE_TYPES_H__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/cudart_platform.h ADDED Viewed

	@@ -0,0 +1,57 @@

+/*
+ * Copyright 2016 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#ifndef __CUDART_PLATFORM_H__
+#define __CUDART_PLATFORM_H__
+#if ((defined(__linux__) || defined(__QNX__)) && (defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)))
+#define isEglSupported 1
+#endif
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/device_atomic_functions.h ADDED Viewed

	@@ -0,0 +1,211 @@

+/*
+ * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__DEVICE_ATOMIC_FUNCTIONS_H__)
+#define __DEVICE_ATOMIC_FUNCTIONS_H__
+#if defined(__CUDACC_RTC__)
+#define __DEVICE_ATOMIC_FUNCTIONS_DECL__ __device__
+#else /* __CUDACC_RTC__ */
+#define __DEVICE_ATOMIC_FUNCTIONS_DECL__ static __inline__ __device__
+#endif /* __CUDACC_RTC__ */
+#if defined(__cplusplus) && defined(__CUDACC__)
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#include "cuda_runtime_api.h"
+#ifndef __CUDA_ARCH__
+#define __DEF_IF_HOST { }
+#else  /* !__CUDA_ARCH__ */
+#define __DEF_IF_HOST ;
+#endif /* __CUDA_ARCH__ */
+#ifdef __CUDA_ARCH__
+extern "C"
+{
+extern __device__ __device_builtin__ int          __iAtomicAdd(int *address, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicAdd(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ int          __iAtomicExch(int *address, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicExch(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ float        __fAtomicExch(float *address, float val);
+extern __device__ __device_builtin__ int          __iAtomicMin(int *address, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicMin(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ int          __iAtomicMax(int *address, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicMax(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicInc(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicDec(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ int          __iAtomicAnd(int *address, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicAnd(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ int          __iAtomicOr(int *address, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicOr(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ int          __iAtomicXor(int *address, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicXor(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__ int          __iAtomicCAS(int *address, int compare, int val);
+extern __device__ __device_builtin__ unsigned int __uAtomicCAS(unsigned int *address, unsigned int compare, unsigned int val);
+}
+#endif /* __CUDA_ARCH__ */
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicAdd(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicAdd(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicSub(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicSub(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicExch(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicExch(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ float atomicExch(float *address, float val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicMin(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicMin(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicMax(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicMax(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicInc(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicDec(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicAnd(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicAnd(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicOr(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicOr(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicXor(int *address, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicXor(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicCAS(int *address, int compare, int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicCAS(unsigned int *address, unsigned int compare, unsigned int val) __DEF_IF_HOST
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#include "cuda_runtime_api.h"
+#if defined(_WIN32)
+# define __DEPRECATED__(msg) __declspec(deprecated(msg))
+#elif (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5 && !defined(__clang__))))
+# define __DEPRECATED__(msg) __attribute__((deprecated))
+#else
+# define __DEPRECATED__(msg) __attribute__((deprecated(msg)))
+#endif
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
+#define __WSB_DEPRECATION_MESSAGE(x) #x"() is not valid on compute_70 and above, and should be replaced with "#x"_sync()."\
+    "To continue using "#x"(), specify virtual architecture compute_60 when targeting sm_70 and above, for example, using the pair of compiler options: -arch=compute_60 -code=sm_70."
+#else
+#define __WSB_DEPRECATION_MESSAGE(x) #x"() is deprecated in favor of "#x"_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."
+#endif
+extern "C"
+{
+#ifdef __CUDA_ARCH__
+extern __device__ __device_builtin__ unsigned long long int __ullAtomicAdd(unsigned long long int *address, unsigned long long int val);
+extern __device__ __device_builtin__ unsigned long long int __ullAtomicExch(unsigned long long int *address, unsigned long long int val);
+extern __device__ __device_builtin__ unsigned long long int __ullAtomicCAS(unsigned long long int *address, unsigned long long int compare, unsigned long long int val);
+#endif  /* __CUDA_ARCH__ */
+extern __device__ __device_builtin__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__any)) int __any(int cond);
+extern __device__ __device_builtin__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__all)) int __all(int cond);
+}
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned long long int atomicAdd(unsigned long long int *address, unsigned long long int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned long long int atomicExch(unsigned long long int *address, unsigned long long int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned long long int atomicCAS(unsigned long long int *address, unsigned long long int compare, unsigned long long int val) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__any)) bool any(bool cond) __DEF_IF_HOST
+__DEVICE_ATOMIC_FUNCTIONS_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__all)) bool all(bool cond) __DEF_IF_HOST
+#undef __DEPRECATED__
+#undef __WSB_DEPRECATION_MESSAGE
+#endif /* __cplusplus && __CUDACC__ */
+#undef __DEF_IF_HOST
+#undef __DEVICE_ATOMIC_FUNCTIONS_DECL__
+#if !defined(__CUDACC_RTC__) && defined(__CUDA_ARCH__)
+#include "device_atomic_functions.hpp"
+#endif /* !__CUDACC_RTC__ && defined(__CUDA_ARCH__) */
+#endif /* !__DEVICE_ATOMIC_FUNCTIONS_H__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_32_atomic_functions.h ADDED Viewed

	@@ -0,0 +1,131 @@

+/*
+ * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 35.235 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.35.235 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__SM_32_ATOMIC_FUNCTIONS_H__)
+#define __SM_32_ATOMIC_FUNCTIONS_H__
+#if defined(__CUDACC_RTC__)
+#define __SM_32_ATOMIC_FUNCTIONS_DECL__ __device__
+#else /* !__CUDACC_RTC__ */
+#define __SM_32_ATOMIC_FUNCTIONS_DECL__ static __inline__ __device__
+#endif /* __CUDACC_RTC__ */
+#if defined(__cplusplus) && defined(__CUDACC__)
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#include "cuda_runtime_api.h"
+#ifndef __CUDA_ARCH__
+#define __DEF_IF_HOST { }
+#else  /* !__CUDA_ARCH__ */
+#define __DEF_IF_HOST ;
+#endif /* __CUDA_ARCH__ */
+#ifdef __CUDA_ARCH__
+extern "C"
+{
+extern __device__ __device_builtin__ long long __illAtomicMin(long long *address, long long val);
+extern __device__ __device_builtin__ long long __illAtomicMax(long long *address, long long val);
+extern __device__ __device_builtin__ long long __llAtomicAnd(long long *address, long long val);
+extern __device__ __device_builtin__ long long __llAtomicOr(long long *address, long long val);
+extern __device__ __device_builtin__ long long __llAtomicXor(long long *address, long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicMin(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicMax(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicAnd(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicOr (unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicXor(unsigned long long *address, unsigned long long val);
+}
+#endif /* __CUDA_ARCH__ */
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicMin(long long *address, long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicMax(long long *address, long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicAnd(long long *address, long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicOr(long long *address, long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicXor(long long *address, long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicMin(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicMax(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicAnd(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicOr(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicXor(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+#endif /* !__CUDA_ARCH__ || __CUDA_ARCH__ >= 320 */
+#endif /* __cplusplus && __CUDACC__ */
+#undef __DEF_IF_HOST
+#undef __SM_32_ATOMIC_FUNCTIONS_DECL__
+#if !defined(__CUDACC_RTC__) && defined(__CUDA_ARCH__)
+#include "sm_32_atomic_functions.hpp"
+#endif /* !__CUDACC_RTC__  && defined(__CUDA_ARCH__) */
+#endif /* !__SM_32_ATOMIC_FUNCTIONS_H__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_32_intrinsics.h ADDED Viewed

	@@ -0,0 +1,510 @@

+/*
+ * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__SM_32_INTRINSICS_H__)
+#define __SM_32_INTRINSICS_H__
+#if defined(__CUDACC_RTC__)
+#define __SM_32_INTRINSICS_DECL__ __device__
+#else /* !__CUDACC_RTC__ */
+#define __SM_32_INTRINSICS_DECL__ static __device__ __inline__
+#endif /* __CUDACC_RTC__ */
+#if defined(__cplusplus) && defined(__CUDACC__)
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#include "cuda_runtime_api.h"
+#ifndef __CUDA_ARCH__
+#define __DEF_IF_HOST { }
+#else  /* !__CUDA_ARCH__ */
+#define __DEF_IF_HOST ;
+#endif /* __CUDA_ARCH__ */
+/*******************************************************************************
+*                                                                              *
+*  Below are declarations of SM-3.5 intrinsics which are included as           *
+*  source (instead of being built in to the compiler)                          *
+*                                                                              *
+*******************************************************************************/
+/******************************************************************************
+ *                                   __ldg                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ long __ldg(const long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long __ldg(const unsigned long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char __ldg(const char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ signed char __ldg(const signed char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short __ldg(const short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int __ldg(const int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ long long __ldg(const long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char2 __ldg(const char2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char4 __ldg(const char4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short2 __ldg(const short2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short4 __ldg(const short4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int2 __ldg(const int2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int4 __ldg(const int4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ longlong2 __ldg(const longlong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned char __ldg(const unsigned char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned short __ldg(const unsigned short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned int __ldg(const unsigned int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldg(const unsigned long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar2 __ldg(const uchar2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar4 __ldg(const uchar4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort2 __ldg(const ushort2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort4 __ldg(const ushort4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint2 __ldg(const uint2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint4 __ldg(const uint4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldg(const ulonglong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float __ldg(const float *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double __ldg(const double *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float2 __ldg(const float2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float4 __ldg(const float4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double2 __ldg(const double2 *ptr) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __ldcg                                   *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ long __ldcg(const long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcg(const unsigned long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char __ldcg(const char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ signed char __ldcg(const signed char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short __ldcg(const short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int __ldcg(const int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ long long __ldcg(const long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char2 __ldcg(const char2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char4 __ldcg(const char4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short2 __ldcg(const short2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short4 __ldcg(const short4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int2 __ldcg(const int2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int4 __ldcg(const int4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ longlong2 __ldcg(const longlong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned char __ldcg(const unsigned char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned short __ldcg(const unsigned short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned int __ldcg(const unsigned int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldcg(const unsigned long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar2 __ldcg(const uchar2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar4 __ldcg(const uchar4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort2 __ldcg(const ushort2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort4 __ldcg(const ushort4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint2 __ldcg(const uint2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint4 __ldcg(const uint4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcg(const ulonglong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float __ldcg(const float *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double __ldcg(const double *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float2 __ldcg(const float2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float4 __ldcg(const float4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double2 __ldcg(const double2 *ptr) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __ldca                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ long __ldca(const long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long __ldca(const unsigned long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char __ldca(const char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ signed char __ldca(const signed char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short __ldca(const short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int __ldca(const int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ long long __ldca(const long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char2 __ldca(const char2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char4 __ldca(const char4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short2 __ldca(const short2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short4 __ldca(const short4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int2 __ldca(const int2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int4 __ldca(const int4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ longlong2 __ldca(const longlong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned char __ldca(const unsigned char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned short __ldca(const unsigned short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned int __ldca(const unsigned int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldca(const unsigned long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar2 __ldca(const uchar2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar4 __ldca(const uchar4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort2 __ldca(const ushort2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort4 __ldca(const ushort4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint2 __ldca(const uint2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint4 __ldca(const uint4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldca(const ulonglong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float __ldca(const float *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double __ldca(const double *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float2 __ldca(const float2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float4 __ldca(const float4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double2 __ldca(const double2 *ptr) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __ldcs                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ long __ldcs(const long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcs(const unsigned long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char __ldcs(const char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ signed char __ldcs(const signed char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short __ldcs(const short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int __ldcs(const int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ long long __ldcs(const long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char2 __ldcs(const char2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char4 __ldcs(const char4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short2 __ldcs(const short2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short4 __ldcs(const short4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int2 __ldcs(const int2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int4 __ldcs(const int4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ longlong2 __ldcs(const longlong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned char __ldcs(const unsigned char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned short __ldcs(const unsigned short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned int __ldcs(const unsigned int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldcs(const unsigned long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar2 __ldcs(const uchar2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar4 __ldcs(const uchar4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort2 __ldcs(const ushort2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort4 __ldcs(const ushort4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint2 __ldcs(const uint2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint4 __ldcs(const uint4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcs(const ulonglong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float __ldcs(const float *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double __ldcs(const double *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float2 __ldcs(const float2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float4 __ldcs(const float4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double2 __ldcs(const double2 *ptr) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __ldlu                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ long __ldlu(const long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long __ldlu(const unsigned long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char __ldlu(const char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ signed char __ldlu(const signed char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short __ldlu(const short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int __ldlu(const int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ long long __ldlu(const long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char2 __ldlu(const char2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char4 __ldlu(const char4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short2 __ldlu(const short2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short4 __ldlu(const short4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int2 __ldlu(const int2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int4 __ldlu(const int4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ longlong2 __ldlu(const longlong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned char __ldlu(const unsigned char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned short __ldlu(const unsigned short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned int __ldlu(const unsigned int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldlu(const unsigned long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar2 __ldlu(const uchar2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar4 __ldlu(const uchar4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort2 __ldlu(const ushort2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort4 __ldlu(const ushort4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint2 __ldlu(const uint2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint4 __ldlu(const uint4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldlu(const ulonglong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float __ldlu(const float *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double __ldlu(const double *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float2 __ldlu(const float2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float4 __ldlu(const float4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double2 __ldlu(const double2 *ptr) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __ldcv                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ long __ldcv(const long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcv(const unsigned long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char __ldcv(const char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ signed char __ldcv(const signed char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short __ldcv(const short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int __ldcv(const int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ long long __ldcv(const long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char2 __ldcv(const char2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ char4 __ldcv(const char4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short2 __ldcv(const short2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ short4 __ldcv(const short4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int2 __ldcv(const int2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ int4 __ldcv(const int4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ longlong2 __ldcv(const longlong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned char __ldcv(const unsigned char *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned short __ldcv(const unsigned short *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned int __ldcv(const unsigned int *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldcv(const unsigned long long *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar2 __ldcv(const uchar2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uchar4 __ldcv(const uchar4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort2 __ldcv(const ushort2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ushort4 __ldcv(const ushort4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint2 __ldcv(const uint2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ uint4 __ldcv(const uint4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcv(const ulonglong2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float __ldcv(const float *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double __ldcv(const double *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float2 __ldcv(const float2 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ float4 __ldcv(const float4 *ptr) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ double2 __ldcv(const double2 *ptr) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __stwb                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ void __stwb(long *ptr, long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long *ptr, unsigned long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(char *ptr, char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(signed char *ptr, signed char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(short *ptr, short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(int *ptr, int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(long long *ptr, long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(char2 *ptr, char2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(char4 *ptr, char4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(short2 *ptr, short2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(short4 *ptr, short4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(int2 *ptr, int2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(int4 *ptr, int4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(longlong2 *ptr, longlong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned char *ptr, unsigned char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned short *ptr, unsigned short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned int *ptr, unsigned int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long long *ptr, unsigned long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(uchar2 *ptr, uchar2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(uchar4 *ptr, uchar4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(ushort2 *ptr, ushort2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(ushort4 *ptr, ushort4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(uint2 *ptr, uint2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(uint4 *ptr, uint4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(ulonglong2 *ptr, ulonglong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(float *ptr, float value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(double *ptr, double value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(float2 *ptr, float2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(float4 *ptr, float4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwb(double2 *ptr, double2 value) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __stcg                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ void __stcg(long *ptr, long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long *ptr, unsigned long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(char *ptr, char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(signed char *ptr, signed char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(short *ptr, short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(int *ptr, int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(long long *ptr, long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(char2 *ptr, char2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(char4 *ptr, char4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(short2 *ptr, short2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(short4 *ptr, short4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(int2 *ptr, int2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(int4 *ptr, int4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(longlong2 *ptr, longlong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned char *ptr, unsigned char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned short *ptr, unsigned short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned int *ptr, unsigned int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long long *ptr, unsigned long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(uchar2 *ptr, uchar2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(uchar4 *ptr, uchar4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(ushort2 *ptr, ushort2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(ushort4 *ptr, ushort4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(uint2 *ptr, uint2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(uint4 *ptr, uint4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(ulonglong2 *ptr, ulonglong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(float *ptr, float value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(double *ptr, double value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(float2 *ptr, float2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(float4 *ptr, float4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcg(double2 *ptr, double2 value) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __stcs                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ void __stcs(long *ptr, long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long *ptr, unsigned long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(char *ptr, char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(signed char *ptr, signed char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(short *ptr, short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(int *ptr, int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(long long *ptr, long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(char2 *ptr, char2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(char4 *ptr, char4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(short2 *ptr, short2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(short4 *ptr, short4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(int2 *ptr, int2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(int4 *ptr, int4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(longlong2 *ptr, longlong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned char *ptr, unsigned char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned short *ptr, unsigned short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned int *ptr, unsigned int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long long *ptr, unsigned long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(uchar2 *ptr, uchar2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(uchar4 *ptr, uchar4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(ushort2 *ptr, ushort2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(ushort4 *ptr, ushort4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(uint2 *ptr, uint2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(uint4 *ptr, uint4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(ulonglong2 *ptr, ulonglong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(float *ptr, float value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(double *ptr, double value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(float2 *ptr, float2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(float4 *ptr, float4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stcs(double2 *ptr, double2 value) __DEF_IF_HOST
+/******************************************************************************
+ *                                   __stwt                                    *
+ ******************************************************************************/
+__SM_32_INTRINSICS_DECL__ void __stwt(long *ptr, long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long *ptr, unsigned long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(char *ptr, char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(signed char *ptr, signed char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(short *ptr, short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(int *ptr, int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(long long *ptr, long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(char2 *ptr, char2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(char4 *ptr, char4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(short2 *ptr, short2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(short4 *ptr, short4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(int2 *ptr, int2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(int4 *ptr, int4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(longlong2 *ptr, longlong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned char *ptr, unsigned char value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned short *ptr, unsigned short value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned int *ptr, unsigned int value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long long *ptr, unsigned long long value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(uchar2 *ptr, uchar2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(uchar4 *ptr, uchar4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(ushort2 *ptr, ushort2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(ushort4 *ptr, ushort4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(uint2 *ptr, uint2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(uint4 *ptr, uint4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(ulonglong2 *ptr, ulonglong2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(float *ptr, float value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(double *ptr, double value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(float2 *ptr, float2 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(float4 *ptr, float4 value) __DEF_IF_HOST
+__SM_32_INTRINSICS_DECL__ void __stwt(double2 *ptr, double2 value) __DEF_IF_HOST
+// SHF is the "funnel shift" operation - an accelerated left/right shift with carry
+// operating on 64-bit quantities, which are concatenations of two 32-bit registers.
+/**
+ * \ingroup CUDA_MATH_INTRINSIC_INT
+ * \brief Concatenate \p hi : \p lo, shift left by \p shift & 31 bits, return the most significant 32 bits.
+ *
+ * Shift the 64-bit value formed by concatenating argument \p lo and \p hi left by the amount specified by the argument \p shift.
+ * Argument \p lo holds bits 31:0 and argument \p hi holds bits 63:32 of the 64-bit source value.
+ * The source is shifted left by the wrapped value of \p shift (\p shift & 31).
+ * The most significant 32-bits of the result are returned.
+ *
+ * \return Returns the most significant 32 bits of the shifted 64-bit value.
+ */
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift) __DEF_IF_HOST
+/**
+ * \ingroup CUDA_MATH_INTRINSIC_INT
+ * \brief Concatenate \p hi : \p lo, shift left by min(\p shift, 32) bits, return the most significant 32 bits.
+ *
+ * Shift the 64-bit value formed by concatenating argument \p lo and \p hi left by the amount specified by the argument \p shift.
+ * Argument \p lo holds bits 31:0 and argument \p hi holds bits 63:32 of the 64-bit source value.
+ * The source is shifted left by the clamped value of \p shift (min(\p shift, 32)).
+ * The most significant 32-bits of the result are returned.
+ *
+ * \return Returns the most significant 32 bits of the shifted 64-bit value.
+ */
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift) __DEF_IF_HOST
+/**
+ * \ingroup CUDA_MATH_INTRINSIC_INT
+ * \brief Concatenate \p hi : \p lo, shift right by \p shift & 31 bits, return the least significant 32 bits.
+ *
+ * Shift the 64-bit value formed by concatenating argument \p lo and \p hi right by the amount specified by the argument \p shift.
+ * Argument \p lo holds bits 31:0 and argument \p hi holds bits 63:32 of the 64-bit source value.
+ * The source is shifted right by the wrapped value of \p shift (\p shift & 31).
+ * The least significant 32-bits of the result are returned.
+ *
+ * \return Returns the least significant 32 bits of the shifted 64-bit value.
+ */
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift) __DEF_IF_HOST
+/**
+ * \ingroup CUDA_MATH_INTRINSIC_INT
+ * \brief Concatenate \p hi : \p lo, shift right by min(\p shift, 32) bits, return the least significant 32 bits.
+ *
+ * Shift the 64-bit value formed by concatenating argument \p lo and \p hi right by the amount specified by the argument \p shift.
+ * Argument \p lo holds bits 31:0 and argument \p hi holds bits 63:32 of the 64-bit source value.
+ * The source is shifted right by the clamped value of \p shift (min(\p shift, 32)).
+ * The least significant 32-bits of the result are returned.
+ *
+ * \return Returns the least significant 32 bits of the shifted 64-bit value.
+ */
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift) __DEF_IF_HOST
+#endif /* !__CUDA_ARCH__ || __CUDA_ARCH__ >= 320 */
+#endif /* __cplusplus && __CUDACC__ */
+#undef __SM_32_INTRINSICS_DECL__
+#if !defined(__CUDACC_RTC__) && defined(__CUDA_ARCH__)
+#include "sm_32_intrinsics.hpp"
+#endif /* !__CUDACC_RTC__  && defined(__CUDA_ARCH__)  */
+#endif /* !__SM_32_INTRINSICS_H__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_32_intrinsics.hpp ADDED Viewed

	@@ -0,0 +1,588 @@

+/*
+ * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__SM_32_INTRINSICS_HPP__)
+#define __SM_32_INTRINSICS_HPP__
+#if defined(__CUDACC_RTC__)
+#define __SM_32_INTRINSICS_DECL__ __device__
+#else /* !__CUDACC_RTC__ */
+#define __SM_32_INTRINSICS_DECL__ static __device__ __inline__
+#endif /* __CUDACC_RTC__ */
+#if defined(__cplusplus) && defined(__CUDACC__)
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#include "cuda_runtime_api.h"
+// In here are intrinsics which are built in to the compiler. These may be
+// referenced by intrinsic implementations from this file.
+extern "C"
+{
+    // There are no intrinsics built in to the compiler for SM-3.5,
+    // all intrinsics are now implemented as inline PTX below.
+}
+/*******************************************************************************
+*                                                                              *
+*  Below are implementations of SM-3.5 intrinsics which are included as        *
+*  source (instead of being built in to the compiler)                          *
+*                                                                              *
+*******************************************************************************/
+// LDG is a "load from global via texture path" command which can exhibit higher
+// bandwidth on GK110 than a regular LD.
+// Define a different pointer storage size for 64 and 32 bit
+#if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)
+#define __LDG_PTR   "l"
+#else
+#define __LDG_PTR   "r"
+#endif
+/******************************************************************************
+ *                                   __ldg                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ long __ldg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ long __ldg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.nc.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+#endif
+__SM_32_INTRINSICS_DECL__ char __ldg(const char *ptr) { unsigned int ret; asm volatile ("ld.global.nc.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
+__SM_32_INTRINSICS_DECL__ signed char __ldg(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.nc.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
+__SM_32_INTRINSICS_DECL__ short __ldg(const short *ptr) { unsigned short ret; asm volatile ("ld.global.nc.s16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
+__SM_32_INTRINSICS_DECL__ int __ldg(const int *ptr) { unsigned int ret; asm volatile ("ld.global.nc.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
+__SM_32_INTRINSICS_DECL__ long long __ldg(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.nc.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
+__SM_32_INTRINSICS_DECL__ char2 __ldg(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.nc.v2.s8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ char4 __ldg(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.nc.v4.s8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ short2 __ldg(const short2 *ptr) { short2 ret; asm volatile ("ld.global.nc.v2.s16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ short4 __ldg(const short4 *ptr) { short4 ret; asm volatile ("ld.global.nc.v4.s16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int2 __ldg(const int2 *ptr) { int2 ret; asm volatile ("ld.global.nc.v2.s32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int4 __ldg(const int4 *ptr) { int4 ret; asm volatile ("ld.global.nc.v4.s32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ longlong2 __ldg(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.nc.v2.s64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned char __ldg(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.nc.u8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr));  return (unsigned char)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned short __ldg(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.nc.u16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned int __ldg(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.nc.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldg(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.nc.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uchar2 __ldg(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.nc.v2.u8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ uchar4 __ldg(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.nc.v4.u8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ ushort2 __ldg(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.nc.v2.u16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ushort4 __ldg(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.nc.v4.u16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint2 __ldg(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.nc.v2.u32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint4 __ldg(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.nc.v4.u32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldg(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.nc.v2.u64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float __ldg(const float *ptr) { float ret; asm volatile ("ld.global.nc.f32 %0, [%1];"  : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double __ldg(const double *ptr) { double ret; asm volatile ("ld.global.nc.f64 %0, [%1];"  : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float2 __ldg(const float2 *ptr) { float2 ret; asm volatile ("ld.global.nc.v2.f32 {%0,%1}, [%2];"  : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float4 __ldg(const float4 *ptr) { float4 ret; asm volatile ("ld.global.nc.v4.f32 {%0,%1,%2,%3}, [%4];"  : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double2 __ldg(const double2 *ptr) { double2 ret; asm volatile ("ld.global.nc.v2.f64 {%0,%1}, [%2];"  : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
+/******************************************************************************
+ *                                   __ldcg                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ long __ldcg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ long __ldcg(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcg(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cg.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+#endif
+__SM_32_INTRINSICS_DECL__ char __ldcg(const char *ptr) { unsigned int ret; asm volatile ("ld.global.cg.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
+__SM_32_INTRINSICS_DECL__ signed char __ldcg(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.cg.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
+__SM_32_INTRINSICS_DECL__ short __ldcg(const short *ptr) { unsigned short ret; asm volatile ("ld.global.cg.s16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
+__SM_32_INTRINSICS_DECL__ int __ldcg(const int *ptr) { unsigned int ret; asm volatile ("ld.global.cg.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
+__SM_32_INTRINSICS_DECL__ long long __ldcg(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cg.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
+__SM_32_INTRINSICS_DECL__ char2 __ldcg(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.cg.v2.s8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ char4 __ldcg(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.cg.v4.s8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ short2 __ldcg(const short2 *ptr) { short2 ret; asm volatile ("ld.global.cg.v2.s16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ short4 __ldcg(const short4 *ptr) { short4 ret; asm volatile ("ld.global.cg.v4.s16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int2 __ldcg(const int2 *ptr) { int2 ret; asm volatile ("ld.global.cg.v2.s32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int4 __ldcg(const int4 *ptr) { int4 ret; asm volatile ("ld.global.cg.v4.s32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ longlong2 __ldcg(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.cg.v2.s64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned char __ldcg(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.cg.u8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr));  return (unsigned char)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned short __ldcg(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.cg.u16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned int __ldcg(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.cg.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldcg(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cg.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uchar2 __ldcg(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.cg.v2.u8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ uchar4 __ldcg(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.cg.v4.u8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ ushort2 __ldcg(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.cg.v2.u16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ushort4 __ldcg(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.cg.v4.u16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint2 __ldcg(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.cg.v2.u32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint4 __ldcg(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.cg.v4.u32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcg(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.cg.v2.u64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float __ldcg(const float *ptr) { float ret; asm volatile ("ld.global.cg.f32 %0, [%1];"  : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double __ldcg(const double *ptr) { double ret; asm volatile ("ld.global.cg.f64 %0, [%1];"  : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float2 __ldcg(const float2 *ptr) { float2 ret; asm volatile ("ld.global.cg.v2.f32 {%0,%1}, [%2];"  : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float4 __ldcg(const float4 *ptr) { float4 ret; asm volatile ("ld.global.cg.v4.f32 {%0,%1,%2,%3}, [%4];"  : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double2 __ldcg(const double2 *ptr) { double2 ret; asm volatile ("ld.global.cg.v2.f64 {%0,%1}, [%2];"  : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
+/******************************************************************************
+ *                                   __ldca                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ long __ldca(const long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldca(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ long __ldca(const long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldca(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.ca.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+#endif
+__SM_32_INTRINSICS_DECL__ char __ldca(const char *ptr) { unsigned int ret; asm volatile ("ld.global.ca.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
+__SM_32_INTRINSICS_DECL__ signed char __ldca(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.ca.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
+__SM_32_INTRINSICS_DECL__ short __ldca(const short *ptr) { unsigned short ret; asm volatile ("ld.global.ca.s16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
+__SM_32_INTRINSICS_DECL__ int __ldca(const int *ptr) { unsigned int ret; asm volatile ("ld.global.ca.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
+__SM_32_INTRINSICS_DECL__ long long __ldca(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.ca.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
+__SM_32_INTRINSICS_DECL__ char2 __ldca(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.ca.v2.s8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ char4 __ldca(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.ca.v4.s8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ short2 __ldca(const short2 *ptr) { short2 ret; asm volatile ("ld.global.ca.v2.s16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ short4 __ldca(const short4 *ptr) { short4 ret; asm volatile ("ld.global.ca.v4.s16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int2 __ldca(const int2 *ptr) { int2 ret; asm volatile ("ld.global.ca.v2.s32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int4 __ldca(const int4 *ptr) { int4 ret; asm volatile ("ld.global.ca.v4.s32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ longlong2 __ldca(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.ca.v2.s64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned char __ldca(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.ca.u8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr));  return (unsigned char)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned short __ldca(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.ca.u16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned int __ldca(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.ca.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldca(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.ca.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uchar2 __ldca(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.ca.v2.u8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ uchar4 __ldca(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.ca.v4.u8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ ushort2 __ldca(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.ca.v2.u16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ushort4 __ldca(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.ca.v4.u16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint2 __ldca(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.ca.v2.u32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint4 __ldca(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.ca.v4.u32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldca(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.ca.v2.u64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float __ldca(const float *ptr) { float ret; asm volatile ("ld.global.ca.f32 %0, [%1];"  : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double __ldca(const double *ptr) { double ret; asm volatile ("ld.global.ca.f64 %0, [%1];"  : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float2 __ldca(const float2 *ptr) { float2 ret; asm volatile ("ld.global.ca.v2.f32 {%0,%1}, [%2];"  : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float4 __ldca(const float4 *ptr) { float4 ret; asm volatile ("ld.global.ca.v4.f32 {%0,%1,%2,%3}, [%4];"  : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double2 __ldca(const double2 *ptr) { double2 ret; asm volatile ("ld.global.ca.v2.f64 {%0,%1}, [%2];"  : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
+/******************************************************************************
+ *                                   __ldcs                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ long __ldcs(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcs(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ long __ldcs(const long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcs(const unsigned long *ptr) { unsigned long ret; asm volatile ("ld.global.cs.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+#endif
+__SM_32_INTRINSICS_DECL__ char __ldcs(const char *ptr) { unsigned int ret; asm volatile ("ld.global.cs.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (char)ret; }
+__SM_32_INTRINSICS_DECL__ signed char __ldcs(const signed char *ptr) { unsigned int ret; asm volatile ("ld.global.cs.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (signed char)ret; }
+__SM_32_INTRINSICS_DECL__ short __ldcs(const short *ptr) { unsigned short ret; asm volatile ("ld.global.cs.s16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return (short)ret; }
+__SM_32_INTRINSICS_DECL__ int __ldcs(const int *ptr) { unsigned int ret; asm volatile ("ld.global.cs.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return (int)ret; }
+__SM_32_INTRINSICS_DECL__ long long __ldcs(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cs.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return (long long)ret; }
+__SM_32_INTRINSICS_DECL__ char2 __ldcs(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.cs.v2.s8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ char4 __ldcs(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.cs.v4.s8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ short2 __ldcs(const short2 *ptr) { short2 ret; asm volatile ("ld.global.cs.v2.s16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ short4 __ldcs(const short4 *ptr) { short4 ret; asm volatile ("ld.global.cs.v4.s16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int2 __ldcs(const int2 *ptr) { int2 ret; asm volatile ("ld.global.cs.v2.s32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ int4 __ldcs(const int4 *ptr) { int4 ret; asm volatile ("ld.global.cs.v4.s32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ longlong2 __ldcs(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.cs.v2.s64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned char __ldcs(const unsigned char *ptr) { unsigned int ret; asm volatile ("ld.global.cs.u8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr));  return (unsigned char)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned short __ldcs(const unsigned short *ptr) { unsigned short ret; asm volatile ("ld.global.cs.u16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned int __ldcs(const unsigned int *ptr) { unsigned int ret; asm volatile ("ld.global.cs.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldcs(const unsigned long long *ptr) { unsigned long long ret; asm volatile ("ld.global.cs.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uchar2 __ldcs(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm volatile ("ld.global.cs.v2.u8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ uchar4 __ldcs(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm volatile ("ld.global.cs.v4.u8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr)); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ ushort2 __ldcs(const ushort2 *ptr) { ushort2 ret; asm volatile ("ld.global.cs.v2.u16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ushort4 __ldcs(const ushort4 *ptr) { ushort4 ret; asm volatile ("ld.global.cs.v4.u16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint2 __ldcs(const uint2 *ptr) { uint2 ret; asm volatile ("ld.global.cs.v2.u32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ uint4 __ldcs(const uint4 *ptr) { uint4 ret; asm volatile ("ld.global.cs.v4.u32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcs(const ulonglong2 *ptr) { ulonglong2 ret; asm volatile ("ld.global.cs.v2.u64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float __ldcs(const float *ptr) { float ret; asm volatile ("ld.global.cs.f32 %0, [%1];"  : "=f"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double __ldcs(const double *ptr) { double ret; asm volatile ("ld.global.cs.f64 %0, [%1];"  : "=d"(ret) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float2 __ldcs(const float2 *ptr) { float2 ret; asm volatile ("ld.global.cs.v2.f32 {%0,%1}, [%2];"  : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ float4 __ldcs(const float4 *ptr) { float4 ret; asm volatile ("ld.global.cs.v4.f32 {%0,%1,%2,%3}, [%4];"  : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr)); return ret; }
+__SM_32_INTRINSICS_DECL__ double2 __ldcs(const double2 *ptr) { double2 ret; asm volatile ("ld.global.cs.v2.f64 {%0,%1}, [%2];"  : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr)); return ret; }
+/******************************************************************************
+ *                                   __ldlu                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ long __ldlu(const long *ptr) { unsigned long ret; asm ("ld.global.lu.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldlu(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.lu.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ long __ldlu(const long *ptr) { unsigned long ret; asm ("ld.global.lu.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldlu(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.lu.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+#endif
+__SM_32_INTRINSICS_DECL__ char __ldlu(const char *ptr) { unsigned int ret; asm ("ld.global.lu.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (char)ret; }
+__SM_32_INTRINSICS_DECL__ signed char __ldlu(const signed char *ptr) { unsigned int ret; asm ("ld.global.lu.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (signed char)ret; }
+__SM_32_INTRINSICS_DECL__ short __ldlu(const short *ptr) { unsigned short ret; asm ("ld.global.lu.s16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return (short)ret; }
+__SM_32_INTRINSICS_DECL__ int __ldlu(const int *ptr) { unsigned int ret; asm ("ld.global.lu.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (int)ret; }
+__SM_32_INTRINSICS_DECL__ long long __ldlu(const long long *ptr) { unsigned long long ret; asm ("ld.global.lu.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long long)ret; }
+__SM_32_INTRINSICS_DECL__ char2 __ldlu(const char2 *ptr) { char2 ret; int2 tmp; asm ("ld.global.lu.v2.s8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ char4 __ldlu(const char4 *ptr) { char4 ret; int4 tmp; asm ("ld.global.lu.v4.s8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ short2 __ldlu(const short2 *ptr) { short2 ret; asm ("ld.global.lu.v2.s16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ short4 __ldlu(const short4 *ptr) { short4 ret; asm ("ld.global.lu.v4.s16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ int2 __ldlu(const int2 *ptr) { int2 ret; asm ("ld.global.lu.v2.s32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ int4 __ldlu(const int4 *ptr) { int4 ret; asm ("ld.global.lu.v4.s32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ longlong2 __ldlu(const longlong2 *ptr) { longlong2 ret; asm ("ld.global.lu.v2.s64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned char __ldlu(const unsigned char *ptr) { unsigned int ret; asm ("ld.global.lu.u8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory");  return (unsigned char)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned short __ldlu(const unsigned short *ptr) { unsigned short ret; asm ("ld.global.lu.u16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned int __ldlu(const unsigned int *ptr) { unsigned int ret; asm ("ld.global.lu.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldlu(const unsigned long long *ptr) { unsigned long long ret; asm ("ld.global.lu.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ uchar2 __ldlu(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm ("ld.global.lu.v2.u8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ uchar4 __ldlu(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm ("ld.global.lu.v4.u8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ ushort2 __ldlu(const ushort2 *ptr) { ushort2 ret; asm ("ld.global.lu.v2.u16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ ushort4 __ldlu(const ushort4 *ptr) { ushort4 ret; asm ("ld.global.lu.v4.u16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ uint2 __ldlu(const uint2 *ptr) { uint2 ret; asm ("ld.global.lu.v2.u32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ uint4 __ldlu(const uint4 *ptr) { uint4 ret; asm ("ld.global.lu.v4.u32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldlu(const ulonglong2 *ptr) { ulonglong2 ret; asm ("ld.global.lu.v2.u64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ float __ldlu(const float *ptr) { float ret; asm ("ld.global.lu.f32 %0, [%1];"  : "=f"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ double __ldlu(const double *ptr) { double ret; asm ("ld.global.lu.f64 %0, [%1];"  : "=d"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ float2 __ldlu(const float2 *ptr) { float2 ret; asm ("ld.global.lu.v2.f32 {%0,%1}, [%2];"  : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ float4 __ldlu(const float4 *ptr) { float4 ret; asm ("ld.global.lu.v4.f32 {%0,%1,%2,%3}, [%4];"  : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ double2 __ldlu(const double2 *ptr) { double2 ret; asm ("ld.global.lu.v2.f64 {%0,%1}, [%2];"  : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+/******************************************************************************
+ *                                   __ldcv                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ long __ldcv(const long *ptr) { unsigned long ret; asm ("ld.global.cv.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcv(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.cv.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ long __ldcv(const long *ptr) { unsigned long ret; asm ("ld.global.cv.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (long)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long __ldcv(const unsigned long *ptr) { unsigned long ret; asm ("ld.global.cv.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+#endif
+__SM_32_INTRINSICS_DECL__ char __ldcv(const char *ptr) { unsigned int ret; asm ("ld.global.cv.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (char)ret; }
+__SM_32_INTRINSICS_DECL__ signed char __ldcv(const signed char *ptr) { unsigned int ret; asm ("ld.global.cv.s8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (signed char)ret; }
+__SM_32_INTRINSICS_DECL__ short __ldcv(const short *ptr) { unsigned short ret; asm ("ld.global.cv.s16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return (short)ret; }
+__SM_32_INTRINSICS_DECL__ int __ldcv(const int *ptr) { unsigned int ret; asm ("ld.global.cv.s32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return (int)ret; }
+__SM_32_INTRINSICS_DECL__ long long __ldcv(const long long *ptr) { unsigned long long ret; asm ("ld.global.cv.s64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return (long long)ret; }
+__SM_32_INTRINSICS_DECL__ char2 __ldcv(const char2 *ptr) { char2 ret; int2 tmp; asm ("ld.global.cv.v2.s8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ char4 __ldcv(const char4 *ptr) { char4 ret; int4 tmp; asm ("ld.global.cv.v4.s8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ short2 __ldcv(const short2 *ptr) { short2 ret; asm ("ld.global.cv.v2.s16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ short4 __ldcv(const short4 *ptr) { short4 ret; asm ("ld.global.cv.v4.s16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ int2 __ldcv(const int2 *ptr) { int2 ret; asm ("ld.global.cv.v2.s32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ int4 __ldcv(const int4 *ptr) { int4 ret; asm ("ld.global.cv.v4.s32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ longlong2 __ldcv(const longlong2 *ptr) { longlong2 ret; asm ("ld.global.cv.v2.s64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned char __ldcv(const unsigned char *ptr) { unsigned int ret; asm ("ld.global.cv.u8 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory");  return (unsigned char)ret; }
+__SM_32_INTRINSICS_DECL__ unsigned short __ldcv(const unsigned short *ptr) { unsigned short ret; asm ("ld.global.cv.u16 %0, [%1];"  : "=h"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned int __ldcv(const unsigned int *ptr) { unsigned int ret; asm ("ld.global.cv.u32 %0, [%1];"  : "=r"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ unsigned long long __ldcv(const unsigned long long *ptr) { unsigned long long ret; asm ("ld.global.cv.u64 %0, [%1];"  : "=l"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ uchar2 __ldcv(const uchar2 *ptr) { uchar2 ret; uint2 tmp; asm ("ld.global.cv.v2.u8 {%0,%1}, [%2];"  : "=r"(tmp.x), "=r"(tmp.y) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; return ret; }
+__SM_32_INTRINSICS_DECL__ uchar4 __ldcv(const uchar4 *ptr) { uchar4 ret; uint4 tmp; asm ("ld.global.cv.v4.u8 {%0,%1,%2,%3}, [%4];"  : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : __LDG_PTR (ptr) : "memory"); ret.x = (unsigned char)tmp.x; ret.y = (unsigned char)tmp.y; ret.z = (unsigned char)tmp.z; ret.w = (unsigned char)tmp.w; return ret; }
+__SM_32_INTRINSICS_DECL__ ushort2 __ldcv(const ushort2 *ptr) { ushort2 ret; asm ("ld.global.cv.v2.u16 {%0,%1}, [%2];"  : "=h"(ret.x), "=h"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ ushort4 __ldcv(const ushort4 *ptr) { ushort4 ret; asm ("ld.global.cv.v4.u16 {%0,%1,%2,%3}, [%4];"  : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ uint2 __ldcv(const uint2 *ptr) { uint2 ret; asm ("ld.global.cv.v2.u32 {%0,%1}, [%2];"  : "=r"(ret.x), "=r"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ uint4 __ldcv(const uint4 *ptr) { uint4 ret; asm ("ld.global.cv.v4.u32 {%0,%1,%2,%3}, [%4];"  : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ ulonglong2 __ldcv(const ulonglong2 *ptr) { ulonglong2 ret; asm ("ld.global.cv.v2.u64 {%0,%1}, [%2];"  : "=l"(ret.x), "=l"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ float __ldcv(const float *ptr) { float ret; asm ("ld.global.cv.f32 %0, [%1];"  : "=f"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ double __ldcv(const double *ptr) { double ret; asm ("ld.global.cv.f64 %0, [%1];"  : "=d"(ret) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ float2 __ldcv(const float2 *ptr) { float2 ret; asm ("ld.global.cv.v2.f32 {%0,%1}, [%2];"  : "=f"(ret.x), "=f"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ float4 __ldcv(const float4 *ptr) { float4 ret; asm ("ld.global.cv.v4.f32 {%0,%1,%2,%3}, [%4];"  : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w) : __LDG_PTR (ptr) : "memory"); return ret; }
+__SM_32_INTRINSICS_DECL__ double2 __ldcv(const double2 *ptr) { double2 ret; asm ("ld.global.cv.v2.f64 {%0,%1}, [%2];"  : "=d"(ret.x), "=d"(ret.y) : __LDG_PTR (ptr) : "memory"); return ret; }
+/******************************************************************************
+ *                                   __stwb                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ void __stwb(long *ptr, long value) { asm ("st.global.wb.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long *ptr, unsigned long value) { asm ("st.global.wb.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ void __stwb(long *ptr, long value) { asm ("st.global.wb.s32 [%0], %1;"  :: __LDG_PTR (ptr),  "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long *ptr, unsigned long value) { asm ("st.global.wb.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+#endif
+__SM_32_INTRINSICS_DECL__ void __stwb(char *ptr, char value) { asm ("st.global.wb.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(signed char *ptr, signed char value) { asm ("st.global.wb.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(short *ptr, short value) { asm ("st.global.wb.s16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(int *ptr, int value) { asm ("st.global.wb.s32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(long long *ptr, long long value) { asm ("st.global.wb.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.wb.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wb.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(short2 *ptr, short2 value) { asm ("st.global.wb.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(short4 *ptr, short4 value) { asm ("st.global.wb.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(int2 *ptr, int2 value) { asm ("st.global.wb.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(int4 *ptr, int4 value) { asm ("st.global.wb.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(longlong2 *ptr, longlong2 value) { asm ("st.global.wb.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned char *ptr, unsigned char value) { asm ("st.global.wb.u8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory");  }
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned short *ptr, unsigned short value) { asm ("st.global.wb.u16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned int *ptr, unsigned int value) { asm ("st.global.wb.u32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(unsigned long long *ptr, unsigned long long value) { asm ("st.global.wb.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.wb.v2.u8 [%0], {%1,%2};"  :: __LDG_PTR (ptr),  "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wb.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(ushort2 *ptr, ushort2 value) { asm ("st.global.wb.v2.u16 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(ushort4 *ptr, ushort4 value) { asm ("st.global.wb.v4.u16 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(uint2 *ptr, uint2 value) { asm ("st.global.wb.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(uint4 *ptr, uint4 value) { asm ("st.global.wb.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.wb.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(float *ptr, float value) { asm ("st.global.wb.f32 [%0], %1;"  :: __LDG_PTR (ptr), "f"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(double *ptr, double value) { asm ("st.global.wb.f64 [%0], %1;"  :: __LDG_PTR (ptr), "d"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(float2 *ptr, float2 value) { asm ("st.global.wb.v2.f32 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(float4 *ptr, float4 value) { asm ("st.global.wb.v4.f32 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwb(double2 *ptr, double2 value) { asm ("st.global.wb.v2.f64 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
+/******************************************************************************
+ *                                   __stcg                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ void __stcg(long *ptr, long value) { asm ("st.global.cg.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long *ptr, unsigned long value) { asm ("st.global.cg.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ void __stcg(long *ptr, long value) { asm ("st.global.cg.s32 [%0], %1;"  :: __LDG_PTR (ptr),  "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long *ptr, unsigned long value) { asm ("st.global.cg.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+#endif
+__SM_32_INTRINSICS_DECL__ void __stcg(char *ptr, char value) { asm ("st.global.cg.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(signed char *ptr, signed char value) { asm ("st.global.cg.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(short *ptr, short value) { asm ("st.global.cg.s16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(int *ptr, int value) { asm ("st.global.cg.s32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(long long *ptr, long long value) { asm ("st.global.cg.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.cg.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cg.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(short2 *ptr, short2 value) { asm ("st.global.cg.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(short4 *ptr, short4 value) { asm ("st.global.cg.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(int2 *ptr, int2 value) { asm ("st.global.cg.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(int4 *ptr, int4 value) { asm ("st.global.cg.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(longlong2 *ptr, longlong2 value) { asm ("st.global.cg.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned char *ptr, unsigned char value) { asm ("st.global.cg.u8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory");  }
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned short *ptr, unsigned short value) { asm ("st.global.cg.u16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned int *ptr, unsigned int value) { asm ("st.global.cg.u32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(unsigned long long *ptr, unsigned long long value) { asm ("st.global.cg.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.cg.v2.u8 [%0], {%1,%2};"  :: __LDG_PTR (ptr),  "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cg.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(ushort2 *ptr, ushort2 value) { asm ("st.global.cg.v2.u16 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(ushort4 *ptr, ushort4 value) { asm ("st.global.cg.v4.u16 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(uint2 *ptr, uint2 value) { asm ("st.global.cg.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(uint4 *ptr, uint4 value) { asm ("st.global.cg.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.cg.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(float *ptr, float value) { asm ("st.global.cg.f32 [%0], %1;"  :: __LDG_PTR (ptr), "f"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(double *ptr, double value) { asm ("st.global.cg.f64 [%0], %1;"  :: __LDG_PTR (ptr), "d"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(float2 *ptr, float2 value) { asm ("st.global.cg.v2.f32 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(float4 *ptr, float4 value) { asm ("st.global.cg.v4.f32 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcg(double2 *ptr, double2 value) { asm ("st.global.cg.v2.f64 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
+/******************************************************************************
+ *                                   __stcs                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ void __stcs(long *ptr, long value) { asm ("st.global.cs.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long *ptr, unsigned long value) { asm ("st.global.cs.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ void __stcs(long *ptr, long value) { asm ("st.global.cs.s32 [%0], %1;"  :: __LDG_PTR (ptr),  "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long *ptr, unsigned long value) { asm ("st.global.cs.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+#endif
+__SM_32_INTRINSICS_DECL__ void __stcs(char *ptr, char value) { asm ("st.global.cs.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(signed char *ptr, signed char value) { asm ("st.global.cs.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(short *ptr, short value) { asm ("st.global.cs.s16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(int *ptr, int value) { asm ("st.global.cs.s32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(long long *ptr, long long value) { asm ("st.global.cs.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.cs.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cs.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(short2 *ptr, short2 value) { asm ("st.global.cs.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(short4 *ptr, short4 value) { asm ("st.global.cs.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(int2 *ptr, int2 value) { asm ("st.global.cs.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(int4 *ptr, int4 value) { asm ("st.global.cs.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(longlong2 *ptr, longlong2 value) { asm ("st.global.cs.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned char *ptr, unsigned char value) { asm ("st.global.cs.u8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory");  }
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned short *ptr, unsigned short value) { asm ("st.global.cs.u16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned int *ptr, unsigned int value) { asm ("st.global.cs.u32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(unsigned long long *ptr, unsigned long long value) { asm ("st.global.cs.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.cs.v2.u8 [%0], {%1,%2};"  :: __LDG_PTR (ptr),  "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.cs.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(ushort2 *ptr, ushort2 value) { asm ("st.global.cs.v2.u16 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(ushort4 *ptr, ushort4 value) { asm ("st.global.cs.v4.u16 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(uint2 *ptr, uint2 value) { asm ("st.global.cs.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(uint4 *ptr, uint4 value) { asm ("st.global.cs.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.cs.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(float *ptr, float value) { asm ("st.global.cs.f32 [%0], %1;"  :: __LDG_PTR (ptr), "f"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(double *ptr, double value) { asm ("st.global.cs.f64 [%0], %1;"  :: __LDG_PTR (ptr), "d"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(float2 *ptr, float2 value) { asm ("st.global.cs.v2.f32 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(float4 *ptr, float4 value) { asm ("st.global.cs.v4.f32 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stcs(double2 *ptr, double2 value) { asm ("st.global.cs.v2.f64 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
+/******************************************************************************
+ *                                   __stwt                                    *
+ ******************************************************************************/
+// Size of long is architecture and OS specific.
+#if defined(__LP64__) // 64 bits
+__SM_32_INTRINSICS_DECL__ void __stwt(long *ptr, long value) { asm ("st.global.wt.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long *ptr, unsigned long value) { asm ("st.global.wt.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+#else // 32 bits
+__SM_32_INTRINSICS_DECL__ void __stwt(long *ptr, long value) { asm ("st.global.wt.s32 [%0], %1;"  :: __LDG_PTR (ptr),  "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long *ptr, unsigned long value) { asm ("st.global.wt.u32 [%0], %1;" :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+#endif
+__SM_32_INTRINSICS_DECL__ void __stwt(char *ptr, char value) { asm ("st.global.wt.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(signed char *ptr, signed char value) { asm ("st.global.wt.s8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(short *ptr, short value) { asm ("st.global.wt.s16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(int *ptr, int value) { asm ("st.global.wt.s32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(long long *ptr, long long value) { asm ("st.global.wt.s64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(char2 *ptr, char2 value) { const int x = value.x, y = value.y; asm ("st.global.wt.v2.s8 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(char4 *ptr, char4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wt.v4.s8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(short2 *ptr, short2 value) { asm ("st.global.wt.v2.s16 [%0], {%1,%2};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(short4 *ptr, short4 value) { asm ("st.global.wt.v4.s16 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(int2 *ptr, int2 value) { asm ("st.global.wt.v2.s32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(int4 *ptr, int4 value) { asm ("st.global.wt.v4.s32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(longlong2 *ptr, longlong2 value) { asm ("st.global.wt.v2.s64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned char *ptr, unsigned char value) { asm ("st.global.wt.u8 [%0], %1;"  :: __LDG_PTR (ptr), "r"((int)value) : "memory");  }
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned short *ptr, unsigned short value) { asm ("st.global.wt.u16 [%0], %1;"  :: __LDG_PTR (ptr), "h"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned int *ptr, unsigned int value) { asm ("st.global.wt.u32 [%0], %1;"  :: __LDG_PTR (ptr), "r"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(unsigned long long *ptr, unsigned long long value) { asm ("st.global.wt.u64 [%0], %1;"  :: __LDG_PTR (ptr), "l"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(uchar2 *ptr, uchar2 value) { const int x = value.x, y = value.y; asm ("st.global.wt.v2.u8 [%0], {%1,%2};"  :: __LDG_PTR (ptr),  "r"(x), "r"(y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(uchar4 *ptr, uchar4 value) { const int x = value.x, y = value.y, z = value.z, w = value.w; asm ("st.global.wt.v4.u8 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(x), "r"(y), "r"(z), "r"(w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(ushort2 *ptr, ushort2 value) { asm ("st.global.wt.v2.u16 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(ushort4 *ptr, ushort4 value) { asm ("st.global.wt.v4.u16 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "h"(value.x), "h"(value.y), "h"(value.z), "h"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(uint2 *ptr, uint2 value) { asm ("st.global.wt.v2.u32 [%0], {%1,%2};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(uint4 *ptr, uint4 value) { asm ("st.global.wt.v4.u32 [%0], {%1,%2,%3,%4};" :: __LDG_PTR (ptr), "r"(value.x), "r"(value.y), "r"(value.z), "r"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(ulonglong2 *ptr, ulonglong2 value) { asm ("st.global.wt.v2.u64 [%0], {%1,%2};" :: __LDG_PTR (ptr), "l"(value.x), "l"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(float *ptr, float value) { asm ("st.global.wt.f32 [%0], %1;"  :: __LDG_PTR (ptr), "f"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(double *ptr, double value) { asm ("st.global.wt.f64 [%0], %1;"  :: __LDG_PTR (ptr), "d"(value) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(float2 *ptr, float2 value) { asm ("st.global.wt.v2.f32 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(float4 *ptr, float4 value) { asm ("st.global.wt.v4.f32 [%0], {%1,%2,%3,%4};"  :: __LDG_PTR (ptr), "f"(value.x), "f"(value.y), "f"(value.z), "f"(value.w) : "memory"); }
+__SM_32_INTRINSICS_DECL__ void __stwt(double2 *ptr, double2 value) { asm ("st.global.wt.v2.f64 [%0], {%1,%2};"  :: __LDG_PTR (ptr), "d"(value.x), "d"(value.y) : "memory"); }
+#undef __LDG_PTR
+// SHF is the "funnel shift" operation - an accelerated left/right shift with carry
+// operating on 64-bit quantities, which are concatenations of two 32-bit registers.
+// This shifts [b:a] left by "shift" bits, returning the most significant bits of the result.
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift)
+{
+    unsigned int ret;
+    asm volatile ("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
+    return ret;
+}
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift)
+{
+    unsigned int ret;
+    asm volatile ("shf.l.clamp.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
+    return ret;
+}
+// This shifts [b:a] right by "shift" bits, returning the least significant bits of the result.
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift)
+{
+    unsigned int ret;
+    asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
+    return ret;
+}
+__SM_32_INTRINSICS_DECL__ unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift)
+{
+    unsigned int ret;
+    asm volatile ("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(lo), "r"(hi), "r"(shift));
+    return ret;
+}
+#endif /* !__CUDA_ARCH__ || __CUDA_ARCH__ >= 320 */
+#endif /* __cplusplus && __CUDACC__ */
+#undef __SM_32_INTRINSICS_DECL__
+#endif /* !__SM_32_INTRINSICS_HPP__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/include/sm_60_atomic_functions.h ADDED Viewed

	@@ -0,0 +1,539 @@

+/*
+ * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__SM_60_ATOMIC_FUNCTIONS_H__)
+#define __SM_60_ATOMIC_FUNCTIONS_H__
+#if defined(__CUDACC_RTC__)
+#define __SM_60_ATOMIC_FUNCTIONS_DECL__ __device__
+#else /* __CUDACC_RTC__ */
+#define __SM_60_ATOMIC_FUNCTIONS_DECL__ static __inline__ __device__
+#endif /* __CUDACC_RTC__ */
+#if defined(__cplusplus) && defined(__CUDACC__)
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+#include "cuda_runtime_api.h"
+#ifndef __CUDA_ARCH__
+#define __DEF_IF_HOST { }
+#else  /* !__CUDA_ARCH__ */
+#define __DEF_IF_HOST ;
+#endif /* __CUDA_ARCH__ */
+#ifdef __CUDA_ARCH__
+extern "C"
+{
+extern __device__ __device_builtin__ double __dAtomicAdd(double *address, double val);
+extern __device__ __device_builtin__
+int __iAtomicAdd_block(int *address, int val);
+extern __device__ __device_builtin__
+int __iAtomicAdd_system(int *address, int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicAdd_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicAdd_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicAdd_block(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicAdd_system(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+float __fAtomicAdd_block(float *address, float val);
+extern __device__ __device_builtin__
+float __fAtomicAdd_system(float *address, float val);
+extern __device__ __device_builtin__
+double __dAtomicAdd_block(double *address, double val);
+extern __device__ __device_builtin__
+double __dAtomicAdd_system(double *address, double val);
+extern __device__ __device_builtin__
+int __iAtomicExch_block(int *address, int val);
+extern __device__ __device_builtin__
+int __iAtomicExch_system(int *address, int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicExch_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicExch_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicExch_block(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicExch_system(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+float __fAtomicExch_block(float *address, float val);
+extern __device__ __device_builtin__
+float __fAtomicExch_system(float *address, float val);
+extern __device__ __device_builtin__
+int __iAtomicMin_block(int *address, int val);
+extern __device__ __device_builtin__
+int __iAtomicMin_system(int *address, int val);
+extern __device__ __device_builtin__
+long long __illAtomicMin_block(long long *address, long long val);
+extern __device__ __device_builtin__
+long long __illAtomicMin_system(long long *address, long long val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicMin_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicMin_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicMin_block(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicMin_system(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+int __iAtomicMax_block(int *address, int val);
+extern __device__ __device_builtin__
+int __iAtomicMax_system(int *address, int val);
+extern __device__ __device_builtin__
+long long __illAtomicMax_block(long long *address, long long val);
+extern __device__ __device_builtin__
+long long __illAtomicMax_system(long long *address, long long val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicMax_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicMax_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicMax_block(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicMax_system(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicInc_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicInc_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicDec_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicDec_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+int __iAtomicCAS_block(int *address, int compare, int val);
+extern __device__ __device_builtin__
+int __iAtomicCAS_system(int *address, int compare, int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicCAS_block(unsigned int *address, unsigned int compare,
+                                unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicCAS_system(unsigned int *address, unsigned int compare,
+                                 unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicCAS_block(unsigned long long int *address,
+                                        unsigned long long int compare,
+                                        unsigned long long int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicCAS_system(unsigned long long int *address,
+                                         unsigned long long int compare,
+                                         unsigned long long int val);
+extern __device__ __device_builtin__
+int __iAtomicAnd_block(int *address, int val);
+extern __device__ __device_builtin__
+int __iAtomicAnd_system(int *address, int val);
+extern __device__ __device_builtin__
+long long __llAtomicAnd_block(long long *address, long long val);
+extern __device__ __device_builtin__
+long long __llAtomicAnd_system(long long *address, long long val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicAnd_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicAnd_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicAnd_block(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicAnd_system(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+int __iAtomicOr_block(int *address, int val);
+extern __device__ __device_builtin__
+int __iAtomicOr_system(int *address, int val);
+extern __device__ __device_builtin__
+long long __llAtomicOr_block(long long *address, long long val);
+extern __device__ __device_builtin__
+long long __llAtomicOr_system(long long *address, long long val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicOr_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicOr_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicOr_block(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicOr_system(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+int __iAtomicXor_block(int *address, int val);
+extern __device__ __device_builtin__
+int __iAtomicXor_system(int *address, int val);
+extern __device__ __device_builtin__
+long long __llAtomicXor_block(long long *address, long long val);
+extern __device__ __device_builtin__
+long long __llAtomicXor_system(long long *address, long long val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicXor_block(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned int __uAtomicXor_system(unsigned int *address, unsigned int val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicXor_block(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__
+unsigned long long __ullAtomicXor_system(unsigned long long *address, unsigned long long val);
+}
+#endif /* __CUDA_ARCH__ */
+/*******************************************************************************
+*                                                                              *
+*                                                                              *
+*                                                                              *
+*******************************************************************************/
+__SM_60_ATOMIC_FUNCTIONS_DECL__ double atomicAdd(double *address, double val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicAdd_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicAdd_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicAdd_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicAdd_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicAdd_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicAdd_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+float atomicAdd_block(float *address, float val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+float atomicAdd_system(float *address, float val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+double atomicAdd_block(double *address, double val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+double atomicAdd_system(double *address, double val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicSub_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicSub_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicSub_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicSub_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicExch_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicExch_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicExch_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicExch_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicExch_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicExch_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+float atomicExch_block(float *address, float val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+float atomicExch_system(float *address, float val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicMin_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicMin_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicMin_block(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicMin_system(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicMin_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicMin_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicMin_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicMin_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicMax_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicMax_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicMax_block(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicMax_system(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicMax_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicMax_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicMax_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicMax_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicInc_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicInc_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicDec_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicDec_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicCAS_block(int *address, int compare, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicCAS_system(int *address, int compare, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicCAS_block(unsigned int *address, unsigned int compare,
+                             unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicCAS_system(unsigned int *address, unsigned int compare,
+                              unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long int atomicCAS_block(unsigned long long int *address,
+                                       unsigned long long int compare,
+                                       unsigned long long int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long int atomicCAS_system(unsigned long long int *address,
+                                        unsigned long long int compare,
+                                        unsigned long long int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicAnd_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicAnd_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicAnd_block(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicAnd_system(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicAnd_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicAnd_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicAnd_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicAnd_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicOr_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicOr_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicOr_block(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicOr_system(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicOr_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicOr_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicOr_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicOr_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicXor_block(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+int atomicXor_system(int *address, int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicXor_block(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+long long atomicXor_system(long long *address, long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicXor_block(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned int atomicXor_system(unsigned int *address, unsigned int val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicXor_block(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+__SM_60_ATOMIC_FUNCTIONS_DECL__
+unsigned long long atomicXor_system(unsigned long long *address, unsigned long long val) __DEF_IF_HOST
+#endif /* !__CUDA_ARCH__ || __CUDA_ARCH__ >= 600 */
+#endif /* __cplusplus && __CUDACC__ */
+#undef __SM_60_ATOMIC_FUNCTIONS_DECL__
+#undef __DEF_IF_HOST
+#if !defined(__CUDACC_RTC__) && defined(__CUDA_ARCH__)
+#include "sm_60_atomic_functions.hpp"
+#endif /* !__CUDACC_RTC__  && defined(__CUDA_ARCH__)  */
+#endif /* !__SM_60_ATOMIC_FUNCTIONS_H__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (220 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvToolsExt.h ADDED Viewed

	@@ -0,0 +1,1561 @@

+/*
+* Copyright 2009-2017  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+/** \file nvToolsExt.h
+ */
+/* ========================================================================= */
+/** \mainpage
+ * \tableofcontents
+ * \section INTRODUCTION Introduction
+ *
+ * The NVIDIA Tools Extension library is a set of functions that a
+ * developer can use to provide additional information to tools.
+ * The additional information is used by the tool to improve
+ * analysis and visualization of data.
+ *
+ * The library introduces close to zero overhead if no tool is
+ * attached to the application.  The overhead when a tool is
+ * attached is specific to the tool.
+ *
+ * \section INITIALIZATION_SECTION Initialization
+ *
+ * Typically the tool's library that plugs into NVTX is indirectly
+ * loaded via enviromental properties that are platform specific.
+ * For some platform or special cases, the user may be required
+ * to instead explicity initialize instead though.   This can also
+ * be helpful to control when the API loads a tool's library instead
+ * of what would typically be the first function call to emit info.
+ * For these rare case, see \ref INITIALIZATION for additional information.
+ *
+ * \section MARKERS_AND_RANGES Markers and Ranges
+ *
+ * Markers and ranges are used to describe events at a specific time (markers)
+ * or over a time span (ranges) during the execution of the application
+ * respectively.
+ *
+ * \subsection MARKERS Markers
+ *
+ * Markers denote specific moments in time.
+ *
+ *
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
+ * how to specify the domain.
+ *
+ * \subsection THREAD_RANGES Thread Ranges
+ *
+ * Thread ranges denote nested time ranges. Nesting is maintained per thread
+ * per domain and does not require any additional correlation mechanism. The
+ * duration of a thread range is defined by the corresponding pair of
+ * nvtxRangePush* to nvtxRangePop API calls.
+ *
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
+ * how to specify the domain.
+ *
+ * \subsection PROCESS_RANGES Process Ranges
+ *
+ * Process ranges denote a time span that can expose arbitrary concurrency, as
+ * opposed to thread ranges that only support nesting. In addition the range
+ * start event can happen on a different thread than the end marker. For the
+ * correlation of a start/end pair an unique correlation ID is used that is
+ * returned from the start API call and needs to be passed into the end API
+ * call.
+ *
+ * \subsection EVENT_ATTRIBUTES Event Attributes
+ *
+ * \ref MARKERS_AND_RANGES can be annotated with various attributes to provide
+ * additional information for an event or to guide the tool's visualization of
+ * the data. Each of the attributes is optional and if left unused the
+ * attributes fall back to a default value. The attributes include:
+ * - color
+ * - category
+ *
+ * To specify any attribute other than the text message, the \ref
+ * EVENT_ATTRIBUTE_STRUCTURE "Event Attribute Structure" must be used.
+ *
+ * \section DOMAINS Domains
+ *
+ * Domains enable developers to scope annotations. By default all events and
+ * annotations are in the default domain. Additional domains can be registered.
+ * This allows developers to scope markers, ranges, and resources names to
+ * avoid conflicts.
+ *
+ * The function ::nvtxDomainCreateA or ::nvtxDomainCreateW is used to create
+ * a named domain.
+ *
+ * Each domain maintains its own
+ * - categories
+ * - thread range stacks
+ * - registered strings
+ *
+ * The function ::nvtxDomainDestroy marks the end of the domain. Destroying
+ * a domain unregisters and destroys all objects associated with it such as
+ * registered strings, resource objects, named categories, and started ranges.
+ *
+ * \section RESOURCE_NAMING Resource Naming
+ *
+ * This section covers calls that allow to annotate objects with user-provided
+ * names in order to allow for a better analysis of complex trace data. All of
+ * the functions take the handle or the ID of the object to name and the name.
+ * The functions can be called multiple times during the execution of an
+ * application, however, in that case it is implementation dependent which
+ * name will be reported by the tool.
+ *
+ * \subsection CATEGORY_NAMING Category Naming
+ *
+ * Some function in this library support associating an integer category
+ * to enable filtering and sorting.  The category naming functions allow
+ * the application to associate a user friendly name with the integer
+ * category.  Support for domains have been added in NVTX_VERSION_2 to
+ * avoid collisions when domains are developed independantly.
+ *
+ * \subsection RESOURCE_OBJECTS Resource Objects
+ *
+ * Resource objects are a generic mechanism for attaching data to an application
+ * resource.  The identifier field makes the association to a pointer or handle,
+ * while the type field helps provide deeper understanding of the identifier as
+ * well as enabling differentiation in cases where handles generated by different
+ * APIs may collide.  The resource object may also have an associated message to
+ * associate with the application resource, enabling further annotation of this
+ * object and how it is used.
+ *
+ * The resource object was introduced in NVTX_VERSION_2 to supersede existing naming
+ * functions and allow the application resource identified by those functions to be
+ * associated to a domain.  The other naming functions are still supported for backward
+ * compatibility but will be associated only to the default domain.
+ *
+ * \subsection RESOURCE_NAMING_OS Resource Naming
+ *
+ * Some operating system resources creation APIs do not support providing a user friendly
+ * name, such as some OS thread creation APIs.  This API support resource naming though
+ * both through resource objects and functions following the pattern
+ * nvtxName[RESOURCE_TYPE][A|W](identifier, name).  Resource objects introduced in NVTX_VERSION 2
+ * supersede the other functions with a a more general method of assigning names to OS resources,
+ * along with associating them to domains too.  The older nvtxName* functions are only associated
+ * with the default domain.
+ * \section EXTENSIONS Optional Extensions
+ * Optional extensions will either appear within the existing sections the extend or appear
+ * in the "Related Pages" when they introduce new concepts.
+ */
+#ifndef NVTOOLSEXT_H_
+#define NVTOOLSEXT_H_
+#if defined(_MSC_VER)
+    #ifdef NVTX_EXPORTS
+        #define NVTX_DECLSPEC
+    #else
+        #define NVTX_DECLSPEC __declspec(dllimport)
+    #endif /* NVTX_EXPORTS */
+    #define NVTX_API __stdcall
+    #define NVTX_INLINE_STATIC __inline static
+#else /*defined(__GNUC__)*/
+    #define NVTX_DECLSPEC
+    #define NVTX_API
+    #define NVTX_INLINE_STATIC inline static
+#endif /* Platform */
+/**
+ * The nvToolsExt library depends on stdint.h.  If the build tool chain in use
+ * does not include stdint.h then define NVTX_STDINT_TYPES_ALREADY_DEFINED
+ * and define the following types:
+ * <ul>
+ *   <li>uint8_t
+ *   <li>int8_t
+ *   <li>uint16_t
+ *   <li>int16_t
+ *   <li>uint32_t
+ *   <li>int32_t
+ *   <li>uint64_t
+ *   <li>int64_t
+ *   <li>uintptr_t
+ *   <li>intptr_t
+ * </ul>
+ #define NVTX_STDINT_TYPES_ALREADY_DEFINED if you are using your own header file.
+ */
+#ifndef NVTX_STDINT_TYPES_ALREADY_DEFINED
+#include <stdint.h>
+#endif
+#include <stddef.h>
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/**
+ * Tools Extension API version
+ */
+#define NVTX_VERSION 2
+/**
+ * Size of the nvtxEventAttributes_t structure.
+ */
+#define NVTX_EVENT_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxEventAttributes_t) ) )
+/**
+ * Size of the nvtxInitializationAttributes_t structure.
+ */
+#define NVTX_INITIALIZATION_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxInitializationAttributes_t) ) )
+#define NVTX_NO_PUSH_POP_TRACKING ((int)-2)
+typedef uint64_t nvtxRangeId_t;
+/* \brief String Handle Structure.
+* \anchor STRING_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a string.  The tools will return a pointer through the API for the application
+* to hold on it's behalf to reference the string in the future.
+*
+*/
+typedef struct nvtxStringHandle* nvtxStringHandle_t;
+/* \brief Domain Handle Structure.
+* \anchor DOMAIN_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a domain.  The tools will return a pointer through the API for the application
+* to hold on its behalf to reference the domain in the future.
+*
+*/
+typedef struct nvtxDomainHandle* nvtxDomainHandle_t;
+/* ========================================================================= */
+/** \defgroup GENERAL General
+ * @{
+ */
+/** ---------------------------------------------------------------------------
+ * Color Types
+ * ------------------------------------------------------------------------- */
+typedef enum nvtxColorType_t
+{
+    NVTX_COLOR_UNKNOWN  = 0,                 /**< Color attribute is unused. */
+    NVTX_COLOR_ARGB     = 1                  /**< An ARGB color is provided. */
+} nvtxColorType_t;
+/** ---------------------------------------------------------------------------
+ * Message Types
+ * ------------------------------------------------------------------------- */
+typedef enum nvtxMessageType_t
+{
+    NVTX_MESSAGE_UNKNOWN          = 0,    /**< Message payload is unused. */
+    NVTX_MESSAGE_TYPE_ASCII       = 1,    /**< A character sequence is used as payload. */
+    NVTX_MESSAGE_TYPE_UNICODE     = 2,     /**< A wide character sequence is used as payload. */
+    /* NVTX_VERSION_2 */
+    NVTX_MESSAGE_TYPE_REGISTERED  = 3     /**< A unique string handle that was registered
+                                                with \ref nvtxDomainRegisterStringA() or
+                                                \ref nvtxDomainRegisterStringW(). */
+} nvtxMessageType_t;
+typedef union nvtxMessageValue_t
+{
+    const char* ascii;
+    const wchar_t* unicode;
+    /* NVTX_VERSION_2 */
+    nvtxStringHandle_t registered;
+} nvtxMessageValue_t;
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup INITIALIZATION Initialization
+* @{
+* Typically the tool's library that plugs into NVTX is indirectly
+* loaded via enviromental properties that are platform specific.
+* For some platform or special cases, the user may be required
+* to instead explicity initialize instead though.  This can also
+* be helpful to control when the API loads a tool's library instead
+* of what would typically be the first function call to emit info.
+*/
+/** ---------------------------------------------------------------------------
+* Initialization Modes
+* ------------------------------------------------------------------------- */
+typedef enum nvtxInitializationMode_t
+{
+    NVTX_INITIALIZATION_MODE_UNKNOWN = 0,   /**< A platform that supports indirect initialization will attempt this style, otherwise expect failure. */
+    NVTX_INITIALIZATION_MODE_CALLBACK_V1 = 1,   /**< A function pointer conforming to NVTX_VERSION=1 will be used. */
+    NVTX_INITIALIZATION_MODE_CALLBACK_V2 = 2,   /**< A function pointer conforming to NVTX_VERSION=2 will be used. */
+    NVTX_INITIALIZATION_MODE_SIZE
+} nvtxInitializationMode_t;
+/** \brief Initialization Attribute Structure.
+* \anchor INITIALIZATION_ATTRIBUTE_STRUCTURE
+*
+* This structure is used to describe the attributes used for initialization
+* of the NVTX API.
+*
+* \par Initializing the Attributes
+*
+* The caller should always perform the following three tasks when using
+* attributes:
+* <ul>
+*    <li>Zero the structure
+*    <li>Set the version field
+*    <li>Set the size field
+* </ul>
+*
+* Zeroing the structure sets all the event attributes types and values
+* to the default value.
+*
+* The version and size field are used by the Tools Extension
+* implementation to handle multiple versions of the attributes structure.
+* NVTX_INITIALIZATION_ATTRIB_STRUCT_SIZE may be used for the size.
+*
+* It is recommended that the caller use one of the following to methods
+* to initialize the event attributes structure:
+*
+* \par Method 1: Initializing nvtxInitializationAttributes_t for future compatibility
+* \code
+* nvtxInitializationAttributes_t initAttribs = {0};
+* initAttribs.version = NVTX_VERSION;
+* initAttribs.size = NVTX_INITIALIZATION_ATTRIB_STRUCT_SIZE;
+* \endcode
+*
+* \par Method 2: Initializing nvtxInitializationAttributes_t for a specific version
+* \code
+* nvtxInitializationAttributes_t initAttribs = {0};
+* initAttribs.version =2;
+* initAttribs.size = (uint16_t)(sizeof(nvtxInitializationAttributes_v2));
+* \endcode
+*
+* If the caller uses Method 1 it is critical that the entire binary
+* layout of the structure be configured to 0 so that all fields
+* are initialized to the default value.
+*
+* The caller should either use both NVTX_VERSION and
+* NVTX_INITIALIZATION_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+* and a versioned type (Method 2).  Using a mix of the two methods
+* will likely cause either source level incompatibility or binary
+* incompatibility in the future.
+*
+* \par Settings Attribute Types and Values
+*
+*
+* \par Example:
+* \code
+* // Initialize
+* nvtxInitializationAttributes_t initAttribs = {0};
+* initAttribs.version = NVTX_VERSION;
+* initAttribs.size = NVTX_INITIALIZATION_ATTRIB_STRUCT_SIZE;
+*
+* // Configure the Attributes
+* initAttribs.mode = NVTX_INITIALIZATION_MODE_CALLBACK_V2;
+* initAttribs.fnptr = InitializeInjectionNvtx2;
+* \endcode
+* \sa
+* ::nvtxInitializationMode_t
+* ::nvtxInitialize
+*/
+typedef struct nvtxInitializationAttributes_v2
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of the event attribute
+    * structure used to specify the event.
+    */
+    uint16_t size;
+    /**
+    * \brief Mode of initialization.
+    *
+    * The mode of initialization dictates the overall behavior and which
+    * attributes in this struct will be used.
+    *
+    * Default Value is NVTX_INITIALIZATION_MODE_UNKNOWN = 0
+    * \sa
+    * ::nvtxInitializationMode_t
+    */
+    uint32_t mode;
+    /**
+    * \brief Function pointer used for initialization if the mode requires
+    *
+    * The user has retrieved this function pointer from the tool library
+    * and would like to use it to initialize.  The mode must be set to a
+    * NVTX_INITIALIZATION_MODE_CALLBACK_V# for this to be used.  The mode
+    * will dictate the expectations for this member.  The function signature
+    * will be cast from void(*)() to the appropriate signature for the mode.
+    * the expected behavior of the function will also depend on the mode
+    * beyond the simple function signature.
+    *
+    * Default Value is NVTX_INITIALIZATION_MODE_UNKNOWN which will either
+    * initialize based on external properties or fail if not supported on
+    * the given platform.
+    * \sa
+    * ::nvtxInitializationMode_t
+    */
+    void(*fnptr)(void);
+} nvtxInitializationAttributes_v2;
+typedef struct nvtxInitializationAttributes_v2 nvtxInitializationAttributes_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Force initialization (optional on most platforms)
+*
+* Force NVTX library to initialize.  On some platform NVTX will implicit initialize
+* upon the first function call into an NVTX API.
+*
+* \return Result codes are simplest to assume NVTX_SUCCESS or !NVTX_SUCCESS
+*
+* \param initAttrib - The initialization attribute structure
+*
+* \sa
+* ::nvtxInitializationAttributes_t
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC int NVTX_API nvtxInitialize(const nvtxInitializationAttributes_t* initAttrib);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup EVENT_ATTRIBUTES Event Attributes
+* @{
+*/
+/** ---------------------------------------------------------------------------
+* Payload Types
+* ------------------------------------------------------------------------- */
+typedef enum nvtxPayloadType_t
+{
+    NVTX_PAYLOAD_UNKNOWN = 0,   /**< Color payload is unused. */
+    NVTX_PAYLOAD_TYPE_UNSIGNED_INT64 = 1,   /**< A 64 bit unsigned integer value is used as payload. */
+    NVTX_PAYLOAD_TYPE_INT64 = 2,   /**< A 64 bit signed integer value is used as payload. */
+    NVTX_PAYLOAD_TYPE_DOUBLE = 3,   /**< A 64 bit floating point value is used as payload. */
+    /* NVTX_VERSION_2 */
+    NVTX_PAYLOAD_TYPE_UNSIGNED_INT32 = 4,   /**< A 32 bit floating point value is used as payload. */
+    NVTX_PAYLOAD_TYPE_INT32 = 5,   /**< A 32 bit floating point value is used as payload. */
+    NVTX_PAYLOAD_TYPE_FLOAT = 6    /**< A 32 bit floating point value is used as payload. */
+} nvtxPayloadType_t;
+/** \brief Event Attribute Structure.
+ * \anchor EVENT_ATTRIBUTE_STRUCTURE
+ *
+ * This structure is used to describe the attributes of an event. The layout of
+ * the structure is defined by a specific version of the tools extension
+ * library and can change between different versions of the Tools Extension
+ * library.
+ *
+ * \par Initializing the Attributes
+ *
+ * The caller should always perform the following three tasks when using
+ * attributes:
+ * <ul>
+ *    <li>Zero the structure
+ *    <li>Set the version field
+ *    <li>Set the size field
+ * </ul>
+ *
+ * Zeroing the structure sets all the event attributes types and values
+ * to the default value.
+ *
+ * The version and size field are used by the Tools Extension
+ * implementation to handle multiple versions of the attributes structure.
+ *
+ * It is recommended that the caller use one of the following to methods
+ * to initialize the event attributes structure:
+ *
+ * \par Method 1: Initializing nvtxEventAttributes for future compatibility
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * \endcode
+ *
+ * \par Method 2: Initializing nvtxEventAttributes for a specific version
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = 1;
+ * eventAttrib.size = (uint16_t)(sizeof(nvtxEventAttributes_v1));
+ * \endcode
+ *
+ * If the caller uses Method 1 it is critical that the entire binary
+ * layout of the structure be configured to 0 so that all fields
+ * are initialized to the default value.
+ *
+ * The caller should either use both NVTX_VERSION and
+ * NVTX_EVENT_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+ * and a versioned type (Method 2).  Using a mix of the two methods
+ * will likely cause either source level incompatibility or binary
+ * incompatibility in the future.
+ *
+ * \par Settings Attribute Types and Values
+ *
+ *
+ * \par Example:
+ * \code
+ * // Initialize
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ *
+ * // Configure the Attributes
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF880000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example";
+ * \endcode
+ *
+ * In the example the caller does not have to set the value of
+ * \ref ::nvtxEventAttributes_v2::category or
+ * \ref ::nvtxEventAttributes_v2::payload as these fields were set to
+ * the default value by {0}.
+ * \sa
+ * ::nvtxDomainMarkEx
+ * ::nvtxDomainRangeStartEx
+ * ::nvtxDomainRangePushEx
+ */
+typedef struct nvtxEventAttributes_v2
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of the event attribute
+    * structure used to specify the event.
+    */
+    uint16_t size;
+    /**
+     * \brief ID of the category the event is assigned to.
+     *
+     * A category is a user-controlled ID that can be used to group
+     * events.  The tool may use category IDs to improve filtering or
+     * enable grouping of events in the same category. The functions
+     * \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used
+     * to name a category.
+     *
+     * Default Value is 0
+     */
+    uint32_t category;
+    /** \brief Color type specified in this attribute structure.
+     *
+     * Defines the color format of the attribute structure's \ref COLOR_FIELD
+     * "color" field.
+     *
+     * Default Value is NVTX_COLOR_UNKNOWN
+     */
+    int32_t colorType;              /* nvtxColorType_t */
+    /** \brief Color assigned to this event. \anchor COLOR_FIELD
+     *
+     * The color that the tool should use to visualize the event.
+     */
+    uint32_t color;
+    /**
+     * \brief Payload type specified in this attribute structure.
+     *
+     * Defines the payload format of the attribute structure's \ref PAYLOAD_FIELD
+     * "payload" field.
+     *
+     * Default Value is NVTX_PAYLOAD_UNKNOWN
+     */
+    int32_t payloadType;            /* nvtxPayloadType_t */
+    int32_t reserved0;
+    /**
+     * \brief Payload assigned to this event. \anchor PAYLOAD_FIELD
+     *
+     * A numerical value that can be used to annotate an event. The tool could
+     * use the payload data to reconstruct graphs and diagrams.
+     */
+    union payload_t
+    {
+        uint64_t ullValue;
+        int64_t llValue;
+        double dValue;
+        /* NVTX_VERSION_2 */
+        uint32_t uiValue;
+        int32_t iValue;
+        float fValue;
+    } payload;
+    /** \brief Message type specified in this attribute structure.
+     *
+     * Defines the message format of the attribute structure's \ref MESSAGE_FIELD
+     * "message" field.
+     *
+     * Default Value is NVTX_MESSAGE_UNKNOWN
+     */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure. \anchor MESSAGE_FIELD
+     *
+     * The text message that is attached to an event.
+     */
+    nvtxMessageValue_t message;
+} nvtxEventAttributes_v2;
+typedef struct nvtxEventAttributes_v2 nvtxEventAttributes_t;
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup MARKERS_AND_RANGES Markers and Ranges
+ *
+ * See \ref MARKERS_AND_RANGES for more details
+ *
+ * @{
+ */
+/** \name Marker */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+*
+* A marker can contain a text message or specify additional information
+* using the event attributes structure.  These attributes include a text
+* message, color, category, and a payload. Each of the attributes is optional
+* and can only be sent out using the \ref nvtxDomainMarkEx function.
+*
+* nvtxDomainMarkEx(NULL, event) is equivalent to calling
+* nvtxMarkEx(event).
+*
+* \param domain    - The domain of scoping the category.
+* \param eventAttrib - The event attribute structure defining the marker's
+* attribute types and attribute values.
+*
+* \sa
+* ::nvtxMarkEx
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+ *
+ * A marker can contain a text message or specify additional information
+ * using the event attributes structure.  These attributes include a text
+ * message, color, category, and a payload. Each of the attributes is optional
+ * and can only be sent out using the \ref nvtxMarkEx function.
+ * If \ref nvtxMarkA or \ref nvtxMarkW are used to specify the marker
+ * or if an attribute is unspecified then a default value will be used.
+ *
+ * \param eventAttrib - The event attribute structure defining the marker's
+ * attribute types and attribute values.
+ *
+ * \par Example:
+ * \code
+ * // zero the structure
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * // set the version and the size information
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * // configure the attributes.  0 is the default for all attributes.
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF880000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example nvtxMarkEx";
+ * nvtxMarkEx(&eventAttrib);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainMarkEx
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+ *
+ * A marker created using \ref nvtxMarkA or \ref nvtxMarkW contains only a
+ * text message.
+ *
+ * \param message     - The message associated to this marker event.
+ *
+ * \par Example:
+ * \code
+ * nvtxMarkA("Example nvtxMarkA");
+ * nvtxMarkW(L"Example nvtxMarkW");
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainMarkEx
+ * ::nvtxMarkEx
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message);
+NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message);
+/** @} */
+/** \name Process Ranges */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range in a domain.
+*
+* \param domain    - The domain of scoping the category.
+* \param eventAttrib - The event attribute structure defining the range's
+* attribute types and attribute values.
+*
+* \return The unique ID used to correlate a pair of Start and End events.
+*
+* \remarks Ranges defined by Start/End can overlap.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "my range";
+* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib);
+* // ...
+* nvtxDomainRangeEnd(rangeId);
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangeEnd
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range.
+ *
+ * \param eventAttrib - The event attribute structure defining the range's
+ * attribute types and attribute values.
+ *
+ * \return The unique ID used to correlate a pair of Start and End events.
+ *
+ * \remarks Ranges defined by Start/End can overlap.
+ *
+ * \par Example:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * eventAttrib.category = 3;
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF0088FF;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example Range";
+ * nvtxRangeId_t rangeId = nvtxRangeStartEx(&eventAttrib);
+ * // ...
+ * nvtxRangeEnd(rangeId);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangeEnd
+ * ::nvtxDomainRangeStartEx
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range.
+ *
+ * \param message     - The event message associated to this range event.
+ *
+ * \return The unique ID used to correlate a pair of Start and End events.
+ *
+ * \remarks Ranges defined by Start/End can overlap.
+ *
+ * \par Example:
+ * \code
+ * nvtxRangeId_t r1 = nvtxRangeStartA("Range 1");
+ * nvtxRangeId_t r2 = nvtxRangeStartW(L"Range 2");
+ * nvtxRangeEnd(r1);
+ * nvtxRangeEnd(r2);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangeEnd
+ * ::nvtxRangeStartEx
+ * ::nvtxDomainRangeStartEx
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message);
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a process range.
+*
+* \param domain - The domain
+* \param id - The correlation ID returned from a nvtxRangeStart call.
+*
+* \remarks This function is offered completeness but is an alias for ::nvtxRangeEnd.
+* It does not need a domain param since that is associated iwth the range ID at ::nvtxDomainRangeStartEx
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "my range";
+* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib);
+* // ...
+* nvtxDomainRangeEnd(rangeId);
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangeStartEx
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a process range.
+ *
+ * \param id - The correlation ID returned from an nvtxRangeStart call.
+ *
+ * \sa
+ * ::nvtxDomainRangeStartEx
+ * ::nvtxRangeStartEx
+ * ::nvtxRangeStartA
+ * ::nvtxRangeStartW
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id);
+/** @} */
+/** \name Thread Ranges */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+*
+* \param domain    - The domain of scoping.
+* \param eventAttrib - The event attribute structure defining the range's
+* attribute types and attribute values.
+*
+* \return The 0 based level of range being started. This value is scoped to the domain.
+* If an error occurs, a negative value is returned.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.colorType = NVTX_COLOR_ARGB;
+* eventAttrib.color = 0xFFFF0000;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "Level 0";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* // Re-use eventAttrib
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
+* eventAttrib.message.unicode = L"Level 1";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* nvtxDomainRangePop(domain); //level 1
+* nvtxDomainRangePop(domain); //level 0
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangePop
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+ *
+ * \param eventAttrib - The event attribute structure defining the range's
+ * attribute types and attribute values.
+ *
+ * \return The 0 based level of range being started. This level is per domain.
+ * If an error occurs a negative value is returned.
+ *
+ * \par Example:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFFFF0000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Level 0";
+ * nvtxRangePushEx(&eventAttrib);
+ *
+ * // Re-use eventAttrib
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
+ * eventAttrib.message.unicode = L"Level 1";
+ * nvtxRangePushEx(&eventAttrib);
+ *
+ * nvtxRangePop();
+ * nvtxRangePop();
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainRangePushEx
+ * ::nvtxRangePop
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+ *
+ * \param message     - The event message associated to this range event.
+ *
+ * \return The 0 based level of range being started.  If an error occurs a
+ * negative value is returned.
+ *
+ * \par Example:
+ * \code
+ * nvtxRangePushA("Level 0");
+ * nvtxRangePushW(L"Level 1");
+ * nvtxRangePop();
+ * nvtxRangePop();
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainRangePushEx
+ * ::nvtxRangePop
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message);
+NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a nested thread range.
+*
+* \return The level of the range being ended. If an error occurs a negative
+* value is returned on the current thread.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreate("example library");
+* nvtxDomainRangePushA(domain, "Level 0");
+* nvtxDomainRangePushW(domain, L"Level 1");
+* nvtxDomainRangePop(domain);
+* nvtxDomainRangePop(domain);
+* \endcode
+*
+* \sa
+* ::nvtxRangePushEx
+* ::nvtxRangePushA
+* ::nvtxRangePushW
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a nested thread range.
+ *
+ * \return The level of the range being ended. If an error occurs a negative
+ * value is returned on the current thread.
+ *
+ * \par Example:
+ * \code
+ * nvtxRangePushA("Level 0");
+ * nvtxRangePushW(L"Level 1");
+ * nvtxRangePop();
+ * nvtxRangePop();
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangePushEx
+ * ::nvtxRangePushA
+ * ::nvtxRangePushW
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePop(void);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup RESOURCE_NAMING Resource Naming
+ *
+ * See \ref RESOURCE_NAMING for more details
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/** \name Functions for Generic Resource Naming*/
+/*  ------------------------------------------------------------------------- */
+/*  ------------------------------------------------------------------------- */
+/** \cond SHOW_HIDDEN
+* \brief Resource typing helpers.
+*
+* Classes are used to make it easy to create a series of resource types
+* per API without collisions
+*/
+#define NVTX_RESOURCE_MAKE_TYPE(CLASS, INDEX) ((((uint32_t)(NVTX_RESOURCE_CLASS_ ## CLASS))<<16)|((uint32_t)(INDEX)))
+#define NVTX_RESOURCE_CLASS_GENERIC 1
+/** \endcond */
+/* ------------------------------------------------------------------------- */
+/** \brief Generic resource type for when a resource class is not available.
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*
+* \version \NVTX_VERSION_2
+*/
+typedef enum nvtxResourceGenericType_t
+{
+    NVTX_RESOURCE_TYPE_UNKNOWN = 0,
+    NVTX_RESOURCE_TYPE_GENERIC_POINTER = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 1), /**< Generic pointer assumed to have no collisions with other pointers. */
+    NVTX_RESOURCE_TYPE_GENERIC_HANDLE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 2), /**< Generic handle assumed to have no collisions with other handles. */
+    NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 3), /**< OS native thread identifier. */
+    NVTX_RESOURCE_TYPE_GENERIC_THREAD_POSIX = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 4) /**< POSIX pthread identifier. */
+} nvtxResourceGenericType_t;
+/** \brief Resource Attribute Structure.
+* \anchor RESOURCE_ATTRIBUTE_STRUCTURE
+*
+* This structure is used to describe the attributes of a resource. The layout of
+* the structure is defined by a specific version of the tools extension
+* library and can change between different versions of the Tools Extension
+* library.
+*
+* \par Initializing the Attributes
+*
+* The caller should always perform the following three tasks when using
+* attributes:
+* <ul>
+*    <li>Zero the structure
+*    <li>Set the version field
+*    <li>Set the size field
+* </ul>
+*
+* Zeroing the structure sets all the resource attributes types and values
+* to the default value.
+*
+* The version and size field are used by the Tools Extension
+* implementation to handle multiple versions of the attributes structure.
+*
+* It is recommended that the caller use one of the following to methods
+* to initialize the event attributes structure:
+*
+* \par Method 1: Initializing nvtxEventAttributes for future compatibility
+* \code
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* \endcode
+*
+* \par Method 2: Initializing nvtxEventAttributes for a specific version
+* \code
+* nvtxResourceAttributes_v0 attribs = {0};
+* attribs.version = 2;
+* attribs.size = (uint16_t)(sizeof(nvtxResourceAttributes_v0));
+* \endcode
+*
+* If the caller uses Method 1 it is critical that the entire binary
+* layout of the structure be configured to 0 so that all fields
+* are initialized to the default value.
+*
+* The caller should either use both NVTX_VERSION and
+* NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+* and a versioned type (Method 2).  Using a mix of the two methods
+* will likely cause either source level incompatibility or binary
+* incompatibility in the future.
+*
+* \par Settings Attribute Types and Values
+*
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+*
+* // Initialize
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+*
+* // Configure the Attributes
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+*
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
+* \endcode
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*/
+typedef struct nvtxResourceAttributes_v0
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of this attribute
+    * structure.
+    */
+    uint16_t size;
+    /**
+    * \brief Identifier type specifies how to interpret the identifier field
+    *
+    * Defines the identifier format of the attribute structure's \ref RESOURCE_IDENTIFIER_FIELD
+    * "identifier" field.
+    *
+    * Default Value is NVTX_RESOURCE_TYPE_UNKNOWN
+    */
+    int32_t identifierType;            /* values from enums following the pattern nvtxResource[name]Type_t */
+    /**
+    * \brief Identifier for the resource.
+    * \anchor RESOURCE_IDENTIFIER_FIELD
+    *
+    * An identifier may be a pointer or a handle to an OS or middleware API object.
+    * The resource type will assist in avoiding collisions where handles values may collide.
+    */
+    union identifier_t
+    {
+        const void* pValue;
+        uint64_t ullValue;
+    } identifier;
+    /** \brief Message type specified in this attribute structure.
+    *
+    * Defines the message format of the attribute structure's \ref RESOURCE_MESSAGE_FIELD
+    * "message" field.
+    *
+    * Default Value is NVTX_MESSAGE_UNKNOWN
+    */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure. \anchor RESOURCE_MESSAGE_FIELD
+    *
+    * The text message that is attached to a resource.
+    */
+    nvtxMessageValue_t message;
+} nvtxResourceAttributes_v0;
+typedef struct nvtxResourceAttributes_v0 nvtxResourceAttributes_t;
+/* \cond SHOW_HIDDEN
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxResourceAttributes_v0) ) )
+typedef struct nvtxResourceHandle* nvtxResourceHandle_t;
+/** \endcond */
+/* ------------------------------------------------------------------------- */
+/** \brief Create a resource object to track and associate data with OS and middleware objects
+*
+* Allows users to associate an API handle or pointer with a user-provided name.
+*
+*
+* \param domain - Domain to own the resource object
+* \param attribs - Attributes to be associated with the resource
+*
+* \return A handle that represents the newly created resource object.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
+* \endcode
+*
+* \sa
+* ::nvtxResourceAttributes_t
+* ::nvtxDomainResourceDestroy
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Destroy a resource object to track and associate data with OS and middleware objects
+*
+* Allows users to associate an API handle or pointer with a user-provided name.
+*
+* \param resource - Handle to the resource in which to operate.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
+* nvtxDomainResourceDestroy(handle);
+* \endcode
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource);
+/** @} */
+/** \name Functions for NVTX Category Naming*/
+/* ------------------------------------------------------------------------- */
+/**
+* \brief Annotate an NVTX category used within a domain.
+*
+* Categories are used to group sets of events. Each category is identified
+* through a unique ID and that ID is passed into any of the marker/range
+* events to assign that event to a specific category. The nvtxDomainNameCategory
+* function calls allow the user to assign a name to a category ID that is
+* specific to the domain.
+*
+* nvtxDomainNameCategory(NULL, category, name) is equivalent to calling
+* nvtxNameCategory(category, name).
+*
+* \param domain    - The domain of scoping the category.
+* \param category  - The category ID to name.
+* \param name      - The name of the category.
+*
+* \remarks The category names are tracked per domain.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example");
+* nvtxDomainNameCategoryA(domain, 1, "Memory Allocation");
+* nvtxDomainNameCategoryW(domain, 2, L"Memory Transfer");
+* \endcode
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
+/** @} */
+/** \brief Annotate an NVTX category.
+ *
+ * Categories are used to group sets of events. Each category is identified
+ * through a unique ID and that ID is passed into any of the marker/range
+ * events to assign that event to a specific category. The nvtxNameCategory
+ * function calls allow the user to assign a name to a category ID.
+ *
+ * \param category - The category ID to name.
+ * \param name     - The name of the category.
+ *
+ * \remarks The category names are tracked per process.
+ *
+ * \par Example:
+ * \code
+ * nvtxNameCategory(1, "Memory Allocation");
+ * nvtxNameCategory(2, "Memory Transfer");
+ * nvtxNameCategory(3, "Memory Object Lifetime");
+ * \endcode
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name);
+/** @} */
+/** \name Functions for OS Threads Naming*/
+/* ------------------------------------------------------------------------- */
+/** \brief Annotate an OS thread.
+ *
+ * Allows the user to name an active thread of the current process. If an
+ * invalid thread ID is provided or a thread ID from a different process is
+ * used the behavior of the tool is implementation dependent.
+ *
+ * The thread name is associated to the default domain.  To support domains
+ * use resource objects via ::nvtxDomainResourceCreate.
+ *
+ * \param threadId - The ID of the thread to name.
+ * \param name     - The name of the thread.
+ *
+ * \par Example:
+ * \code
+ * nvtxNameOsThread(GetCurrentThreadId(), "MAIN_THREAD");
+ * \endcode
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup STRING_REGISTRATION String Registration
+*
+* Registered strings are intended to increase performance by lowering instrumentation
+* overhead.  String may be registered once and the handle may be passed in place of
+* a string where an the APIs may allow.
+*
+* See \ref STRING_REGISTRATION for more details
+*
+* @{
+*/
+/* ------------------------------------------------------------------------- */
+/** \brief Register a string.
+* Registers an immutable string with NVTX. Once registered the pointer used
+* to register the domain name can be used in nvtxEventAttributes_t
+* \ref MESSAGE_FIELD. This allows NVTX implementation to skip copying the
+* contents of the message on each event invocation.
+*
+* String registration is an optimization. It is recommended to use string
+* registration if the string will be passed to an event many times.
+*
+* String are not unregistered, except that by unregistering the entire domain
+*
+* \param domain  - Domain handle. If NULL then the global domain is used.
+* \param string    - A unique pointer to a sequence of characters.
+*
+* \return A handle representing the registered string.
+*
+* \par Example:
+* \code
+* nvtxDomainCreateA("com.nvidia.nvtx.example");
+* nvtxStringHandle_t message = nvtxDomainRegisterStringA(domain, "registered string");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
+* eventAttrib.message.registered = message;
+* \endcode
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string);
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup DOMAINS Domains
+*
+* Domains are used to group events to a developer defined scope. Middleware
+* vendors may also scope their own events to avoid collisions with the
+* the application developer's events, so that the application developer may
+* inspect both parts and easily differentiate or filter them.  By default
+* all events are scoped to a global domain where NULL is provided or when
+* using APIs provided b versions of NVTX below v2
+*
+* Domains are intended to be typically long lived objects with the intention
+* of logically separating events of large modules from each other such as
+* middleware libraries from each other and the main application.
+*
+* See \ref DOMAINS for more details
+*
+* @{
+*/
+/* ------------------------------------------------------------------------- */
+/** \brief Register a NVTX domain.
+*
+* Domains are used to scope annotations. All NVTX_VERSION_0 and NVTX_VERSION_1
+* annotations are scoped to the global domain. The function nvtxDomainCreate
+* creates a new named domain.
+*
+* Each domain maintains its own nvtxRangePush and nvtxRangePop stack.
+*
+* \param name - A unique string representing the domain.
+*
+* \return A handle representing the domain.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
+*
+* nvtxMarkA("nvtxMarkA to global domain");
+*
+* nvtxEventAttributes_t eventAttrib1 = {0};
+* eventAttrib1.version = NVTX_VERSION;
+* eventAttrib1.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib1.message.ascii = "nvtxDomainMarkEx to global domain";
+* nvtxDomainMarkEx(NULL, &eventAttrib1);
+*
+* nvtxEventAttributes_t eventAttrib2 = {0};
+* eventAttrib2.version = NVTX_VERSION;
+* eventAttrib2.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib2.message.ascii = "nvtxDomainMarkEx to com.nvidia.nvtx.example";
+* nvtxDomainMarkEx(domain, &eventAttrib2);
+* nvtxDomainDestroy(domain);
+* \endcode
+*
+* \sa
+* ::nvtxDomainDestroy
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* name);
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Unregister a NVTX domain.
+*
+* Unregisters the domain handle and frees all domain specific resources.
+*
+* \param domain    - the domain handle
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
+* nvtxDomainDestroy(domain);
+* \endcode
+*
+* \sa
+* ::nvtxDomainCreateA
+* ::nvtxDomainCreateW
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \cond SHOW_HIDDEN */
+#ifdef UNICODE
+    #define nvtxMark            nvtxMarkW
+    #define nvtxRangeStart      nvtxRangeStartW
+    #define nvtxRangePush       nvtxRangePushW
+    #define nvtxNameCategory    nvtxNameCategoryW
+    #define nvtxNameOsThread    nvtxNameOsThreadW
+    /* NVTX_VERSION_2 */
+    #define nvtxDomainCreate         nvtxDomainCreateW
+    #define nvtxDomainRegisterString nvtxDomainRegisterStringW
+    #define nvtxDomainNameCategory   nvtxDomainNameCategoryW
+#else
+    #define nvtxMark            nvtxMarkA
+    #define nvtxRangeStart      nvtxRangeStartA
+    #define nvtxRangePush       nvtxRangePushA
+    #define nvtxNameCategory    nvtxNameCategoryA
+    #define nvtxNameOsThread    nvtxNameOsThreadA
+    /* NVTX_VERSION_2 */
+    #define nvtxDomainCreate         nvtxDomainCreateA
+    #define nvtxDomainRegisterString nvtxDomainRegisterStringA
+    #define nvtxDomainNameCategory   nvtxDomainNameCategoryA
+#endif
+/** \endcond */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* NVTOOLSEXT_H_ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvToolsExtCudaRt.h ADDED Viewed

	@@ -0,0 +1,140 @@

+/*
+* Copyright 2009-2017  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#ifndef NVTOOLSEXT_CUDART_H_
+#define NVTOOLSEXT_CUDART_H_
+#include "cuda.h"
+#include "driver_types.h"
+#include "nvToolsExt.h"
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* ========================================================================= */
+/** \name Functions for CUDA Resource Naming
+*/
+/** \addtogroup RESOURCE_NAMING
+ * \section RESOURCE_NAMING_CUDART CUDA Runtime Resource Naming
+ *
+ * This section covers the API functions that allow to annotate CUDA resources
+ * with user-provided names.
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_CUDART 5
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \brief Resource types for CUDART
+*/
+typedef enum nvtxResourceCUDARTType_t
+{
+    NVTX_RESOURCE_TYPE_CUDART_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDART, 0), /* int device */
+    NVTX_RESOURCE_TYPE_CUDART_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDART, 1), /* cudaStream_t */
+    NVTX_RESOURCE_TYPE_CUDART_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDART, 2)  /* cudaEvent_t */
+} nvtxResourceCUDARTType_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA device.
+ *
+ * Allows the user to associate a CUDA device with a user-provided name.
+ *
+ * \param device - The id of the CUDA device to name.
+ * \param name   - The name of the CUDA device.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA stream.
+ *
+ * Allows the user to associate a CUDA stream with a user-provided name.
+ *
+ * \param stream - The handle of the CUDA stream to name.
+ * \param name   - The name of the CUDA stream.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA event.
+ *
+ * Allows the user to associate a CUDA event with a user-provided name.
+ *
+ * \param event - The handle of the CUDA event to name.
+ * \param name  - The name of the CUDA event.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name);
+/** @} */
+/** @} */ /* END RESOURCE_NAMING */
+/* ========================================================================= */
+#ifdef UNICODE
+  #define nvtxNameCudaDevice nvtxNameCudaDeviceW
+  #define nvtxNameCudaStream nvtxNameCudaStreamW
+  #define nvtxNameCudaEvent  nvtxNameCudaEventW
+#else
+  #define nvtxNameCudaDevice nvtxNameCudaDeviceA
+  #define nvtxNameCudaStream nvtxNameCudaStreamA
+  #define nvtxNameCudaEvent  nvtxNameCudaEventA
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* NVTOOLSEXT_CUDART_H_ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvToolsExtSync.h ADDED Viewed

	@@ -0,0 +1,406 @@

+/*
+* Copyright 2009-2017  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#ifndef NVTOOLSEXT_SYNC_H_
+#define NVTOOLSEXT_SYNC_H_
+#include "nvToolsExt.h"
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* \cond SHOW_HIDDEN
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxSyncUserAttributes_v0) ) )
+/** \endcond */
+/**
+* \page PAGE_SYNCHRONIZATION Synchronization
+*
+* This section covers a subset of the API that allow users to track additional
+* synchronization details of their application.   Naming OS synchronization primitives
+* may allow users to better understand the data collected by traced synchronization
+* APIs.  Additionally, a user defined synchronization object can allow the users to
+* to tell the tools when the user is building their own synchronization system
+* that do not rely on the OS to provide behaviors and instead use techniques like
+* atomic operations and spinlocks.
+*
+* See module \ref SYNCHRONIZATION for details.
+*
+* \par Example:
+* \code
+* class MyMutex
+* {
+*     volatile long bLocked;
+*     nvtxSyncUser_t hSync;
+* public:
+*     MyMutex(const char* name, nvtxDomainHandle_t d){
+*          bLocked = 0;
+*
+*          nvtxSyncUserAttributes_t attribs = { 0 };
+*          attribs.version = NVTX_VERSION;
+*          attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+*          attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+*          attribs.message.ascii = name;
+*          hSync = nvtxDomainSyncUserCreate(d, &attribs);
+*     }
+*
+*     ~MyMutex() {
+*          nvtxDomainSyncUserDestroy(hSync);
+*     }
+*
+*     bool Lock() {
+*          nvtxDomainSyncUserAcquireStart(hSync);
+*          bool acquired = __sync_bool_compare_and_swap(&bLocked, 0, 1);//atomic compiler intrinsic
+*          if (acquired) {
+*              nvtxDomainSyncUserAcquireSuccess(hSync);
+*          }
+*          else {
+*              nvtxDomainSyncUserAcquireFailed(hSync);
+*          }
+*          return acquired;
+*     }
+*     void Unlock() {
+*          nvtxDomainSyncUserReleasing(hSync);
+*          bLocked = false;
+*     }
+* };
+* \endcode
+*
+* \version \NVTX_VERSION_2
+*/
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_SYNC_OS 2 /**< Synchronization objects that are OS specific. */
+#define NVTX_RESOURCE_CLASS_SYNC_PTHREAD 3 /**< Synchronization objects that are from the POSIX Threads API (pthread)*/
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \defgroup SYNCHRONIZATION Synchronization
+* See page \ref PAGE_SYNCHRONIZATION.
+* @{
+*/
+/** \brief Resource type values for OSs with POSIX Thread API support
+ */
+typedef enum nvtxResourceSyncPosixThreadType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 1), /* pthread_mutex_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_CONDITION = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 2), /* pthread_cond_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_RWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 3), /* pthread_rwlock_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_BARRIER = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 4), /* pthread_barrier_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 5), /* pthread_spinlock_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_ONCE = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 6) /* pthread_once_t  */
+} nvtxResourceSyncPosixThreadType_t;
+/** \brief Resource type values for Windows OSs
+*/
+typedef enum nvtxResourceSyncWindowsType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_EVENT = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_CRITICAL_SECTION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SRWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5)
+} nvtxResourceSyncWindowsType_t;
+/** \brief Resource type values for Linux and Linux derived OSs such as Android
+* \sa
+* ::nvtxResourceSyncPosixThreadType_t
+*/
+typedef enum nvtxResourceSyncLinuxType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_FUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_COMPLETION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SEQLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 6),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_RCU = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 7)
+} nvtxResourceSyncLinuxType_t;
+/** \brief Resource type values for Android come from Linux.
+* \sa
+* ::nvtxResourceSyncLinuxType_t
+* ::nvtxResourceSyncPosixThreadType_t
+*/
+typedef enum nvtxResourceSyncLinuxType_t nvtxResourceSyncAndroidType_t;
+/** \brief User Defined Synchronization Object Handle .
+* \anchor SYNCUSER_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a user defined syncrhonization object.  The tools will return a pointer through the API for the application
+* to hold on it's behalf to reference the string in the future.
+*
+*/
+typedef struct nvtxSyncUser* nvtxSyncUser_t;
+/** \brief User Defined Synchronization Object Attributes Structure.
+* \anchor USERDEF_SYNC_ATTRIBUTES_STRUCTURE
+*
+* This structure is used to describe the attributes of a user defined synchronization
+* object.  The layout of the structure is defined by a specific version of the tools
+* extension library and can change between different versions of the Tools Extension
+* library.
+*
+* \par Initializing the Attributes
+*
+* The caller should always perform the following three tasks when using
+* attributes:
+* <ul>
+*    <li>Zero the structure
+*    <li>Set the version field
+*    <li>Set the size field
+* </ul>
+*
+* Zeroing the structure sets all the event attributes types and values
+* to the default value.
+*
+* The version and size field are used by the Tools Extension
+* implementation to handle multiple versions of the attributes structure.
+*
+* It is recommended that the caller use one of the following to methods
+* to initialize the event attributes structure:
+*
+* \par Method 1: Initializing nvtxEventAttributes for future compatibility
+* \code
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+* \endcode
+*
+* \par Method 2: Initializing nvtxSyncUserAttributes_t for a specific version
+* \code
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = 1;
+* attribs.size = (uint16_t)(sizeof(nvtxSyncUserAttributes_t));
+* \endcode
+*
+* If the caller uses Method 1 it is critical that the entire binary
+* layout of the structure be configured to 0 so that all fields
+* are initialized to the default value.
+*
+* The caller should either use both NVTX_VERSION and
+* NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+* and a versioned type (Method 2).  Using a mix of the two methods
+* will likely cause either source level incompatibility or binary
+* incompatibility in the future.
+*
+* \par Settings Attribute Types and Values
+*
+*
+* \par Example:
+* \code
+* // Initialize
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+*
+* // Configure the Attributes
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Example";
+* \endcode
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+*/
+typedef struct nvtxSyncUserAttributes_v0
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of the event attribute
+    * structure used to specify the event.
+    */
+    uint16_t size;
+    /** \brief Message type specified in this attribute structure.
+    *
+    * Defines the message format of the attribute structure's \ref nvtxSyncUserAttributes_v0::message
+    * "message" field.
+    *
+    * Default Value is NVTX_MESSAGE_UNKNOWN
+    */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure.
+    *
+    * The text message that is attached to an event.
+    */
+    nvtxMessageValue_t message;
+} nvtxSyncUserAttributes_v0;
+typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Create a user defined synchronization object
+* This is used to track non-OS synchronization working with spinlocks and atomics
+*
+* \param domain - Domain to own the resource
+* \param attribs - A structure to assign multiple attributes to the object.
+*
+* \return A handle that represents the newly created user defined synchronization object.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
+/* ------------------------------------------------------------------------- */
+/** \brief Destroy a user defined synchronization object
+* This is used to track non-OS synchronization working with spinlocks and atomics
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools that an attempt to acquire a user defined synchronization object
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of failure in acquiring a user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireStart
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of success in acquiring a user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireStart.
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of releasing a reservation on user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireSuccess.
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle);
+/** @} */ /*END defgroup*/
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* NVTOOLSEXT_SYNC_H_ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExt.h ADDED Viewed

	@@ -0,0 +1,1499 @@

+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+/** \file nvToolsExt.h
+ */
+/* ========================================================================= */
+/** \mainpage
+ * \tableofcontents
+ * \section INTRODUCTION Introduction
+ *
+ * The NVIDIA Tools Extension library is a set of functions that a
+ * developer can use to provide additional information to tools.
+ * The additional information is used by the tool to improve
+ * analysis and visualization of data.
+ *
+ * The library introduces close to zero overhead if no tool is
+ * attached to the application.  The overhead when a tool is
+ * attached is specific to the tool.
+ *
+ * \section INITIALIZATION_SECTION Initialization
+ *
+ * Typically the tool's library that plugs into NVTX is indirectly
+ * loaded via enviromental properties that are platform specific.
+ * For some platform or special cases, the user may be required
+ * to instead explicity initialize instead though.   This can also
+ * be helpful to control when the API loads a tool's library instead
+ * of what would typically be the first function call to emit info.
+ * For these rare case, see \ref INITIALIZATION for additional information.
+ *
+ * \section MARKERS_AND_RANGES Markers and Ranges
+ *
+ * Markers and ranges are used to describe events at a specific time (markers)
+ * or over a time span (ranges) during the execution of the application
+ * respectively.
+ *
+ * \subsection MARKERS Markers
+ *
+ * Markers denote specific moments in time.
+ *
+ *
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
+ * how to specify the domain.
+ *
+ * \subsection THREAD_RANGES Thread Ranges
+ *
+ * Thread ranges denote nested time ranges. Nesting is maintained per thread
+ * per domain and does not require any additional correlation mechanism. The
+ * duration of a thread range is defined by the corresponding pair of
+ * nvtxRangePush* to nvtxRangePop API calls.
+ *
+ * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
+ * how to specify the domain.
+ *
+ * \subsection PROCESS_RANGES Process Ranges
+ *
+ * Process ranges denote a time span that can expose arbitrary concurrency, as
+ * opposed to thread ranges that only support nesting. In addition the range
+ * start event can happen on a different thread than the end marker. For the
+ * correlation of a start/end pair an unique correlation ID is used that is
+ * returned from the start API call and needs to be passed into the end API
+ * call.
+ *
+ * \subsection EVENT_ATTRIBUTES Event Attributes
+ *
+ * \ref MARKERS_AND_RANGES can be annotated with various attributes to provide
+ * additional information for an event or to guide the tool's visualization of
+ * the data. Each of the attributes is optional and if left unused the
+ * attributes fall back to a default value. The attributes include:
+ * - color
+ * - category
+ *
+ * To specify any attribute other than the text message, the \ref
+ * EVENT_ATTRIBUTE_STRUCTURE "Event Attribute Structure" must be used.
+ *
+ * \section DOMAINS Domains
+ *
+ * Domains enable developers to scope annotations. By default all events and
+ * annotations are in the default domain. Additional domains can be registered.
+ * This allows developers to scope markers, ranges, and resources names to
+ * avoid conflicts.
+ *
+ * The function ::nvtxDomainCreateA or ::nvtxDomainCreateW is used to create
+ * a named domain.
+ *
+ * Each domain maintains its own
+ * - categories
+ * - thread range stacks
+ * - registered strings
+ *
+ * The function ::nvtxDomainDestroy marks the end of the domain. Destroying
+ * a domain unregisters and destroys all objects associated with it such as
+ * registered strings, resource objects, named categories, and started ranges.
+ *
+ * \section RESOURCE_NAMING Resource Naming
+ *
+ * This section covers calls that allow to annotate objects with user-provided
+ * names in order to allow for a better analysis of complex trace data. All of
+ * the functions take the handle or the ID of the object to name and the name.
+ * The functions can be called multiple times during the execution of an
+ * application, however, in that case it is implementation dependent which
+ * name will be reported by the tool.
+ *
+ * \subsection CATEGORY_NAMING Category Naming
+ *
+ * Some function in this library support associating an integer category
+ * to enable filtering and sorting.  The category naming functions allow
+ * the application to associate a user friendly name with the integer
+ * category.  Support for domains have been added in NVTX_VERSION_2 to
+ * avoid collisions when domains are developed independantly.
+ *
+ * \subsection RESOURCE_OBJECTS Resource Objects
+ *
+ * Resource objects are a generic mechanism for attaching data to an application
+ * resource.  The identifier field makes the association to a pointer or handle,
+ * while the type field helps provide deeper understanding of the identifier as
+ * well as enabling differentiation in cases where handles generated by different
+ * APIs may collide.  The resource object may also have an associated message to
+ * associate with the application resource, enabling further annotation of this
+ * object and how it is used.
+ *
+ * The resource object was introduced in NVTX_VERSION_2 to supersede existing naming
+ * functions and allow the application resource identified by those functions to be
+ * associated to a domain.  The other naming functions are still supported for backward
+ * compatibility but will be associated only to the default domain.
+ *
+ * \subsection RESOURCE_NAMING_OS Resource Naming
+ *
+ * Some operating system resources creation APIs do not support providing a user friendly
+ * name, such as some OS thread creation APIs.  This API support resource naming though
+ * both through resource objects and functions following the pattern
+ * nvtxName[RESOURCE_TYPE][A|W](identifier, name).  Resource objects introduced in NVTX_VERSION 2
+ * supersede the other functions with a a more general method of assigning names to OS resources,
+ * along with associating them to domains too.  The older nvtxName* functions are only associated
+ * with the default domain.
+ * \section EXTENSIONS Optional Extensions
+ * Optional extensions will either appear within the existing sections the extend or appear
+ * in the "Related Pages" when they introduce new concepts.
+ */
+ /**
+ * Tools Extension API version
+ */
+#if defined(NVTX_VERSION) && NVTX_VERSION < 3
+#error "Trying to #include NVTX version 3 in a source file where an older NVTX version has already been included.  If you are not directly using NVTX (the NVIDIA Tools Extension library), you are getting this error because libraries you are using have included different versions of NVTX.  Suggested solutions are: (1) reorder #includes so the newest NVTX version is included first, (2) avoid using the conflicting libraries in the same .c/.cpp file, or (3) update the library using the older NVTX version to use the newer version instead."
+#endif
+/* Header guard */
+#if !defined(NVTX_VERSION)
+#define NVTX_VERSION 3
+#if defined(_MSC_VER)
+#define NVTX_API __stdcall
+#define NVTX_INLINE_STATIC __inline static
+#else /*defined(__GNUC__)*/
+#define NVTX_API
+#define NVTX_INLINE_STATIC inline static
+#endif /* Platform */
+#if defined(NVTX_NO_IMPL)
+/* When omitting implementation, avoid declaring functions inline */
+/* without definitions, since this causes compiler warnings. */
+#define NVTX_DECLSPEC
+#elif defined(NVTX_EXPORT_API)
+/* Allow overriding definition of NVTX_DECLSPEC when exporting API. */
+/* Default is empty, meaning non-inline with external linkage. */
+#if !defined(NVTX_DECLSPEC)
+#define NVTX_DECLSPEC
+#endif
+#else
+/* Normal NVTX usage defines the NVTX API inline with static */
+/* (internal) linkage. */
+#define NVTX_DECLSPEC NVTX_INLINE_STATIC
+#endif
+#include "nvtxDetail/nvtxLinkOnce.h"
+#define NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION) NAME##_v##VERSION
+#define NVTX_VERSIONED_IDENTIFIER_L2(NAME, VERSION) NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION)
+#define NVTX_VERSIONED_IDENTIFIER(NAME) NVTX_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION)
+/**
+ * The nvToolsExt library depends on stdint.h.  If the build tool chain in use
+ * does not include stdint.h then define NVTX_STDINT_TYPES_ALREADY_DEFINED
+ * and define the following types:
+ * <ul>
+ *   <li>uint8_t
+ *   <li>int8_t
+ *   <li>uint16_t
+ *   <li>int16_t
+ *   <li>uint32_t
+ *   <li>int32_t
+ *   <li>uint64_t
+ *   <li>int64_t
+ *   <li>uintptr_t
+ *   <li>intptr_t
+ * </ul>
+ * #define NVTX_STDINT_TYPES_ALREADY_DEFINED if you are using your own header file.
+ */
+#ifndef NVTX_STDINT_TYPES_ALREADY_DEFINED
+#include <stdint.h>
+#endif
+#include <stddef.h>
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/**
+* Result Codes
+*/
+#define NVTX_SUCCESS 0
+#define NVTX_FAIL 1
+#define NVTX_ERR_INIT_LOAD_PROPERTY 2
+#define NVTX_ERR_INIT_ACCESS_LIBRARY 3
+#define NVTX_ERR_INIT_LOAD_LIBRARY 4
+#define NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT 5
+#define NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT 6
+#define NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE 7
+/**
+ * Size of the nvtxEventAttributes_t structure.
+ */
+#define NVTX_EVENT_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxEventAttributes_t) ) )
+#define NVTX_NO_PUSH_POP_TRACKING ((int)-2)
+typedef uint64_t nvtxRangeId_t;
+/* Forward declaration of opaque domain registration structure */
+struct nvtxDomainRegistration_st;
+typedef struct nvtxDomainRegistration_st nvtxDomainRegistration;
+/* \brief Domain Handle Structure.
+* \anchor DOMAIN_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a domain.  This type is returned from tools when using the NVTX API to
+* create a domain.
+*
+*/
+typedef nvtxDomainRegistration* nvtxDomainHandle_t;
+/* Forward declaration of opaque string registration structure */
+struct nvtxStringRegistration_st;
+typedef struct nvtxStringRegistration_st nvtxStringRegistration;
+/* \brief Registered String Handle Structure.
+* \anchor REGISTERED_STRING_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a registered string.  This type is returned from tools when using the NVTX
+* API to create a registered string.
+*
+*/
+typedef nvtxStringRegistration* nvtxStringHandle_t;
+/* ========================================================================= */
+/** \defgroup GENERAL General
+ * @{
+ */
+/** ---------------------------------------------------------------------------
+ * Color Types
+ * ------------------------------------------------------------------------- */
+typedef enum nvtxColorType_t
+{
+    NVTX_COLOR_UNKNOWN  = 0,                 /**< Color attribute is unused. */
+    NVTX_COLOR_ARGB     = 1                  /**< An ARGB color is provided. */
+} nvtxColorType_t;
+/** ---------------------------------------------------------------------------
+ * Message Types
+ * ------------------------------------------------------------------------- */
+typedef enum nvtxMessageType_t
+{
+    NVTX_MESSAGE_UNKNOWN          = 0,    /**< Message payload is unused. */
+    NVTX_MESSAGE_TYPE_ASCII       = 1,    /**< A character sequence is used as payload. */
+    NVTX_MESSAGE_TYPE_UNICODE     = 2,     /**< A wide character sequence is used as payload. */
+    /* NVTX_VERSION_2 */
+    NVTX_MESSAGE_TYPE_REGISTERED  = 3,    /**< A unique string handle that was registered
+                                                with \ref nvtxDomainRegisterStringA() or
+                                                \ref nvtxDomainRegisterStringW(). */
+} nvtxMessageType_t;
+typedef union nvtxMessageValue_t
+{
+    const char* ascii;
+    const wchar_t* unicode;
+    /* NVTX_VERSION_2 */
+    nvtxStringHandle_t registered;
+} nvtxMessageValue_t;
+/** @} */ /*END defgroup*/
+/* ------------------------------------------------------------------------- */
+/** \brief Force initialization (optional)
+*
+* Force NVTX library to initialize.  The first call to any NVTX API function
+* will automatically initialize the entire API.  This can make the first call
+* much slower than subsequent calls.  In applications where the first call to
+* NVTX may be in a performance-critical section, calling nvtxInitialize before
+* any performance-critical sections will ensure NVTX initialization occurs at
+* an acceptable time.  Since nvtxInitialize takes no parameters and has no
+* expected behavior besides initialization, it is convenient to add a call to
+* nvtxInitialize in NVTX-instrumented applications that need to force earlier
+* initialization without changing any other code.  For example, if an app's
+* first NVTX call is nvtxDomainCreate, and it is difficult to move that call
+* earlier because the domain handle must be stored in an object only created
+* at that point, adding a call to nvtxInitialize at the top of main() will
+* ensure the later call to nvtxDomainCreate is as fast as possible.
+*
+* \version \NVTX_VERSION_3
+*
+* \param reserved - must be zero or NULL.
+*
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup EVENT_ATTRIBUTES Event Attributes
+* @{
+*/
+/** ---------------------------------------------------------------------------
+* Payload Types
+* ------------------------------------------------------------------------- */
+typedef enum nvtxPayloadType_t
+{
+    NVTX_PAYLOAD_UNKNOWN = 0,   /**< Color payload is unused. */
+    NVTX_PAYLOAD_TYPE_UNSIGNED_INT64 = 1,   /**< A 64 bit unsigned integer value is used as payload. */
+    NVTX_PAYLOAD_TYPE_INT64 = 2,   /**< A 64 bit signed integer value is used as payload. */
+    NVTX_PAYLOAD_TYPE_DOUBLE = 3,   /**< A 64 bit floating point value is used as payload. */
+    /* NVTX_VERSION_2 */
+    NVTX_PAYLOAD_TYPE_UNSIGNED_INT32 = 4,   /**< A 32 bit floating point value is used as payload. */
+    NVTX_PAYLOAD_TYPE_INT32 = 5,   /**< A 32 bit floating point value is used as payload. */
+    NVTX_PAYLOAD_TYPE_FLOAT = 6    /**< A 32 bit floating point value is used as payload. */
+} nvtxPayloadType_t;
+/** \brief Event Attribute Structure.
+ * \anchor EVENT_ATTRIBUTE_STRUCTURE
+ *
+ * This structure is used to describe the attributes of an event. The layout of
+ * the structure is defined by a specific version of the tools extension
+ * library and can change between different versions of the Tools Extension
+ * library.
+ *
+ * \par Initializing the Attributes
+ *
+ * The caller should always perform the following three tasks when using
+ * attributes:
+ * <ul>
+ *    <li>Zero the structure
+ *    <li>Set the version field
+ *    <li>Set the size field
+ * </ul>
+ *
+ * Zeroing the structure sets all the event attributes types and values
+ * to the default value.
+ *
+ * The version and size field are used by the Tools Extension
+ * implementation to handle multiple versions of the attributes structure.
+ *
+ * It is recommended that the caller use one of the following to methods
+ * to initialize the event attributes structure:
+ *
+ * \par Method 1: Initializing nvtxEventAttributes for future compatibility
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * \endcode
+ *
+ * \par Method 2: Initializing nvtxEventAttributes for a specific version
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = 1;
+ * eventAttrib.size = (uint16_t)(sizeof(nvtxEventAttributes_v1));
+ * \endcode
+ *
+ * If the caller uses Method 1 it is critical that the entire binary
+ * layout of the structure be configured to 0 so that all fields
+ * are initialized to the default value.
+ *
+ * The caller should either use both NVTX_VERSION and
+ * NVTX_EVENT_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+ * and a versioned type (Method 2).  Using a mix of the two methods
+ * will likely cause either source level incompatibility or binary
+ * incompatibility in the future.
+ *
+ * \par Settings Attribute Types and Values
+ *
+ *
+ * \par Example:
+ * \code
+ * // Initialize
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ *
+ * // Configure the Attributes
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF880000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example";
+ * \endcode
+ *
+ * In the example the caller does not have to set the value of
+ * \ref ::nvtxEventAttributes_v2::category or
+ * \ref ::nvtxEventAttributes_v2::payload as these fields were set to
+ * the default value by {0}.
+ * \sa
+ * ::nvtxDomainMarkEx
+ * ::nvtxDomainRangeStartEx
+ * ::nvtxDomainRangePushEx
+ */
+typedef struct nvtxEventAttributes_v2
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of the event attribute
+    * structure used to specify the event.
+    */
+    uint16_t size;
+    /**
+     * \brief ID of the category the event is assigned to.
+     *
+     * A category is a user-controlled ID that can be used to group
+     * events.  The tool may use category IDs to improve filtering or
+     * enable grouping of events in the same category. The functions
+     * \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used
+     * to name a category.
+     *
+     * Default Value is 0
+     */
+    uint32_t category;
+    /** \brief Color type specified in this attribute structure.
+     *
+     * Defines the color format of the attribute structure's \ref COLOR_FIELD
+     * "color" field.
+     *
+     * Default Value is NVTX_COLOR_UNKNOWN
+     */
+    int32_t colorType;              /* nvtxColorType_t */
+    /** \brief Color assigned to this event. \anchor COLOR_FIELD
+     *
+     * The color that the tool should use to visualize the event.
+     */
+    uint32_t color;
+    /**
+     * \brief Payload type specified in this attribute structure.
+     *
+     * Defines the payload format of the attribute structure's \ref PAYLOAD_FIELD
+     * "payload" field.
+     *
+     * Default Value is NVTX_PAYLOAD_UNKNOWN
+     */
+    int32_t payloadType;            /* nvtxPayloadType_t */
+    int32_t reserved0;
+    /**
+     * \brief Payload assigned to this event. \anchor PAYLOAD_FIELD
+     *
+     * A numerical value that can be used to annotate an event. The tool could
+     * use the payload data to reconstruct graphs and diagrams.
+     */
+    union payload_t
+    {
+        uint64_t ullValue;
+        int64_t llValue;
+        double dValue;
+        /* NVTX_VERSION_2 */
+        uint32_t uiValue;
+        int32_t iValue;
+        float fValue;
+    } payload;
+    /** \brief Message type specified in this attribute structure.
+     *
+     * Defines the message format of the attribute structure's \ref MESSAGE_FIELD
+     * "message" field.
+     *
+     * Default Value is NVTX_MESSAGE_UNKNOWN
+     */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure. \anchor MESSAGE_FIELD
+     *
+     * The text message that is attached to an event.
+     */
+    nvtxMessageValue_t message;
+} nvtxEventAttributes_v2;
+typedef struct nvtxEventAttributes_v2 nvtxEventAttributes_t;
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup MARKERS_AND_RANGES Markers and Ranges
+ *
+ * See \ref MARKERS_AND_RANGES for more details
+ *
+ * @{
+ */
+/** \name Marker */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+*
+* A marker can contain a text message or specify additional information
+* using the event attributes structure.  These attributes include a text
+* message, color, category, and a payload. Each of the attributes is optional
+* and can only be sent out using the \ref nvtxDomainMarkEx function.
+*
+* nvtxDomainMarkEx(NULL, event) is equivalent to calling
+* nvtxMarkEx(event).
+*
+* \param domain    - The domain of scoping the category.
+* \param eventAttrib - The event attribute structure defining the marker's
+* attribute types and attribute values.
+*
+* \sa
+* ::nvtxMarkEx
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+ *
+ * A marker can contain a text message or specify additional information
+ * using the event attributes structure.  These attributes include a text
+ * message, color, category, and a payload. Each of the attributes is optional
+ * and can only be sent out using the \ref nvtxMarkEx function.
+ * If \ref nvtxMarkA or \ref nvtxMarkW are used to specify the marker
+ * or if an attribute is unspecified then a default value will be used.
+ *
+ * \param eventAttrib - The event attribute structure defining the marker's
+ * attribute types and attribute values.
+ *
+ * \par Example:
+ * \code
+ * // zero the structure
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * // set the version and the size information
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * // configure the attributes.  0 is the default for all attributes.
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF880000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example nvtxMarkEx";
+ * nvtxMarkEx(&eventAttrib);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainMarkEx
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Marks an instantaneous event in the application.
+ *
+ * A marker created using \ref nvtxMarkA or \ref nvtxMarkW contains only a
+ * text message.
+ *
+ * \param message     - The message associated to this marker event.
+ *
+ * \par Example:
+ * \code
+ * nvtxMarkA("Example nvtxMarkA");
+ * nvtxMarkW(L"Example nvtxMarkW");
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainMarkEx
+ * ::nvtxMarkEx
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message);
+NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message);
+/** @} */
+/** \name Process Ranges */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range in a domain.
+*
+* \param domain    - The domain of scoping the category.
+* \param eventAttrib - The event attribute structure defining the range's
+* attribute types and attribute values.
+*
+* \return The unique ID used to correlate a pair of Start and End events.
+*
+* \remarks Ranges defined by Start/End can overlap.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "my range";
+* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib);
+* // ...
+* nvtxDomainRangeEnd(rangeId);
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangeEnd
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range.
+ *
+ * \param eventAttrib - The event attribute structure defining the range's
+ * attribute types and attribute values.
+ *
+ * \return The unique ID used to correlate a pair of Start and End events.
+ *
+ * \remarks Ranges defined by Start/End can overlap.
+ *
+ * \par Example:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * eventAttrib.category = 3;
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFF0088FF;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Example Range";
+ * nvtxRangeId_t rangeId = nvtxRangeStartEx(&eventAttrib);
+ * // ...
+ * nvtxRangeEnd(rangeId);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangeEnd
+ * ::nvtxDomainRangeStartEx
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a process range.
+ *
+ * \param message     - The event message associated to this range event.
+ *
+ * \return The unique ID used to correlate a pair of Start and End events.
+ *
+ * \remarks Ranges defined by Start/End can overlap.
+ *
+ * \par Example:
+ * \code
+ * nvtxRangeId_t r1 = nvtxRangeStartA("Range 1");
+ * nvtxRangeId_t r2 = nvtxRangeStartW(L"Range 2");
+ * nvtxRangeEnd(r1);
+ * nvtxRangeEnd(r2);
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangeEnd
+ * ::nvtxRangeStartEx
+ * ::nvtxDomainRangeStartEx
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message);
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a process range.
+*
+* \param domain - The domain
+* \param id - The correlation ID returned from a nvtxRangeStart call.
+*
+* \remarks This function is offered completeness but is an alias for ::nvtxRangeEnd.
+* It does not need a domain param since that is associated iwth the range ID at ::nvtxDomainRangeStartEx
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "my range";
+* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib);
+* // ...
+* nvtxDomainRangeEnd(rangeId);
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangeStartEx
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a process range.
+ *
+ * \param id - The correlation ID returned from an nvtxRangeStart call.
+ *
+ * \sa
+ * ::nvtxDomainRangeStartEx
+ * ::nvtxRangeStartEx
+ * ::nvtxRangeStartA
+ * ::nvtxRangeStartW
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id);
+/** @} */
+/** \name Thread Ranges */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+*
+* \param domain    - The domain of scoping.
+* \param eventAttrib - The event attribute structure defining the range's
+* attribute types and attribute values.
+*
+* \return The 0 based level of range being started. This value is scoped to the domain.
+* If an error occurs, a negative value is returned.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.colorType = NVTX_COLOR_ARGB;
+* eventAttrib.color = 0xFFFF0000;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* eventAttrib.message.ascii = "Level 0";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* // Re-use eventAttrib
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
+* eventAttrib.message.unicode = L"Level 1";
+* nvtxDomainRangePushEx(domain, &eventAttrib);
+*
+* nvtxDomainRangePop(domain); //level 1
+* nvtxDomainRangePop(domain); //level 0
+* \endcode
+*
+* \sa
+* ::nvtxDomainRangePop
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+ *
+ * \param eventAttrib - The event attribute structure defining the range's
+ * attribute types and attribute values.
+ *
+ * \return The 0 based level of range being started. This level is per domain.
+ * If an error occurs a negative value is returned.
+ *
+ * \par Example:
+ * \code
+ * nvtxEventAttributes_t eventAttrib = {0};
+ * eventAttrib.version = NVTX_VERSION;
+ * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+ * eventAttrib.colorType = NVTX_COLOR_ARGB;
+ * eventAttrib.color = 0xFFFF0000;
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+ * eventAttrib.message.ascii = "Level 0";
+ * nvtxRangePushEx(&eventAttrib);
+ *
+ * // Re-use eventAttrib
+ * eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
+ * eventAttrib.message.unicode = L"Level 1";
+ * nvtxRangePushEx(&eventAttrib);
+ *
+ * nvtxRangePop();
+ * nvtxRangePop();
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainRangePushEx
+ * ::nvtxRangePop
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Starts a nested thread range.
+ *
+ * \param message     - The event message associated to this range event.
+ *
+ * \return The 0 based level of range being started.  If an error occurs a
+ * negative value is returned.
+ *
+ * \par Example:
+ * \code
+ * nvtxRangePushA("Level 0");
+ * nvtxRangePushW(L"Level 1");
+ * nvtxRangePop();
+ * nvtxRangePop();
+ * \endcode
+ *
+ * \sa
+ * ::nvtxDomainRangePushEx
+ * ::nvtxRangePop
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message);
+NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a nested thread range.
+*
+* \return The level of the range being ended. If an error occurs a negative
+* value is returned on the current thread.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreate("example library");
+* nvtxDomainRangePushA(domain, "Level 0");
+* nvtxDomainRangePushW(domain, L"Level 1");
+* nvtxDomainRangePop(domain);
+* nvtxDomainRangePop(domain);
+* \endcode
+*
+* \sa
+* ::nvtxRangePushEx
+* ::nvtxRangePushA
+* ::nvtxRangePushW
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Ends a nested thread range.
+ *
+ * \return The level of the range being ended. If an error occurs a negative
+ * value is returned on the current thread.
+ *
+ * \par Example:
+ * \code
+ * nvtxRangePushA("Level 0");
+ * nvtxRangePushW(L"Level 1");
+ * nvtxRangePop();
+ * nvtxRangePop();
+ * \endcode
+ *
+ * \sa
+ * ::nvtxRangePushEx
+ * ::nvtxRangePushA
+ * ::nvtxRangePushW
+ *
+ * \version \NVTX_VERSION_0
+ * @{ */
+NVTX_DECLSPEC int NVTX_API nvtxRangePop(void);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup RESOURCE_NAMING Resource Naming
+ *
+ * See \ref RESOURCE_NAMING for more details
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/** \name Functions for Generic Resource Naming*/
+/*  ------------------------------------------------------------------------- */
+/*  ------------------------------------------------------------------------- */
+/** \cond SHOW_HIDDEN
+* \brief Resource typing helpers.
+*
+* Classes are used to make it easy to create a series of resource types
+* per API without collisions
+*/
+#define NVTX_RESOURCE_MAKE_TYPE(CLASS, INDEX) ((((uint32_t)(NVTX_RESOURCE_CLASS_ ## CLASS))<<16)|((uint32_t)(INDEX)))
+#define NVTX_RESOURCE_CLASS_GENERIC 1
+/** \endcond */
+/* ------------------------------------------------------------------------- */
+/** \brief Generic resource type for when a resource class is not available.
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*
+* \version \NVTX_VERSION_2
+*/
+typedef enum nvtxResourceGenericType_t
+{
+    NVTX_RESOURCE_TYPE_UNKNOWN = 0,
+    NVTX_RESOURCE_TYPE_GENERIC_POINTER = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 1), /**< Generic pointer assumed to have no collisions with other pointers. */
+    NVTX_RESOURCE_TYPE_GENERIC_HANDLE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 2), /**< Generic handle assumed to have no collisions with other handles. */
+    NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 3), /**< OS native thread identifier. */
+    NVTX_RESOURCE_TYPE_GENERIC_THREAD_POSIX = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 4) /**< POSIX pthread identifier. */
+} nvtxResourceGenericType_t;
+/** \brief Resource Attribute Structure.
+* \anchor RESOURCE_ATTRIBUTE_STRUCTURE
+*
+* This structure is used to describe the attributes of a resource. The layout of
+* the structure is defined by a specific version of the tools extension
+* library and can change between different versions of the Tools Extension
+* library.
+*
+* \par Initializing the Attributes
+*
+* The caller should always perform the following three tasks when using
+* attributes:
+* <ul>
+*    <li>Zero the structure
+*    <li>Set the version field
+*    <li>Set the size field
+* </ul>
+*
+* Zeroing the structure sets all the resource attributes types and values
+* to the default value.
+*
+* The version and size field are used by the Tools Extension
+* implementation to handle multiple versions of the attributes structure.
+*
+* It is recommended that the caller use one of the following to methods
+* to initialize the event attributes structure:
+*
+* \par Method 1: Initializing nvtxEventAttributes for future compatibility
+* \code
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* \endcode
+*
+* \par Method 2: Initializing nvtxEventAttributes for a specific version
+* \code
+* nvtxResourceAttributes_v0 attribs = {0};
+* attribs.version = 2;
+* attribs.size = (uint16_t)(sizeof(nvtxResourceAttributes_v0));
+* \endcode
+*
+* If the caller uses Method 1 it is critical that the entire binary
+* layout of the structure be configured to 0 so that all fields
+* are initialized to the default value.
+*
+* The caller should either use both NVTX_VERSION and
+* NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+* and a versioned type (Method 2).  Using a mix of the two methods
+* will likely cause either source level incompatibility or binary
+* incompatibility in the future.
+*
+* \par Settings Attribute Types and Values
+*
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+*
+* // Initialize
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+*
+* // Configure the Attributes
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+*
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
+* \endcode
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*/
+typedef struct nvtxResourceAttributes_v0
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of this attribute
+    * structure.
+    */
+    uint16_t size;
+    /**
+    * \brief Identifier type specifies how to interpret the identifier field
+    *
+    * Defines the identifier format of the attribute structure's \ref RESOURCE_IDENTIFIER_FIELD
+    * "identifier" field.
+    *
+    * Default Value is NVTX_RESOURCE_TYPE_UNKNOWN
+    */
+    int32_t identifierType;            /* values from enums following the pattern nvtxResource[name]Type_t */
+    /**
+    * \brief Identifier for the resource.
+    * \anchor RESOURCE_IDENTIFIER_FIELD
+    *
+    * An identifier may be a pointer or a handle to an OS or middleware API object.
+    * The resource type will assist in avoiding collisions where handles values may collide.
+    */
+    union identifier_t
+    {
+        const void* pValue;
+        uint64_t ullValue;
+    } identifier;
+    /** \brief Message type specified in this attribute structure.
+    *
+    * Defines the message format of the attribute structure's \ref RESOURCE_MESSAGE_FIELD
+    * "message" field.
+    *
+    * Default Value is NVTX_MESSAGE_UNKNOWN
+    */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure. \anchor RESOURCE_MESSAGE_FIELD
+    *
+    * The text message that is attached to a resource.
+    */
+    nvtxMessageValue_t message;
+} nvtxResourceAttributes_v0;
+typedef struct nvtxResourceAttributes_v0 nvtxResourceAttributes_t;
+/* \cond SHOW_HIDDEN
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxResourceAttributes_v0) ) )
+typedef struct nvtxResourceHandle* nvtxResourceHandle_t;
+/** \endcond */
+/* ------------------------------------------------------------------------- */
+/** \brief Create a resource object to track and associate data with OS and middleware objects
+*
+* Allows users to associate an API handle or pointer with a user-provided name.
+*
+*
+* \param domain - Domain to own the resource object
+* \param attribs - Attributes to be associated with the resource
+*
+* \return A handle that represents the newly created resource object.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
+* \endcode
+*
+* \sa
+* ::nvtxResourceAttributes_t
+* ::nvtxDomainResourceDestroy
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Destroy a resource object to track and associate data with OS and middleware objects
+*
+* Allows users to associate an API handle or pointer with a user-provided name.
+*
+* \param resource - Handle to the resource in which to operate.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
+* nvtxResourceAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
+* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
+* attribs.identifier.pValue = (const void*)pMutex;
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Single thread access to database.";
+* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
+* nvtxDomainResourceDestroy(handle);
+* \endcode
+*
+* \sa
+* ::nvtxDomainResourceCreate
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource);
+/** @} */
+/** \name Functions for NVTX Category Naming*/
+/* ------------------------------------------------------------------------- */
+/**
+* \brief Annotate an NVTX category used within a domain.
+*
+* Categories are used to group sets of events. Each category is identified
+* through a unique ID and that ID is passed into any of the marker/range
+* events to assign that event to a specific category. The nvtxDomainNameCategory
+* function calls allow the user to assign a name to a category ID that is
+* specific to the domain.
+*
+* nvtxDomainNameCategory(NULL, category, name) is equivalent to calling
+* nvtxNameCategory(category, name).
+*
+* \param domain    - The domain of scoping the category.
+* \param category  - The category ID to name.
+* \param name      - The name of the category.
+*
+* \remarks The category names are tracked per domain.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("example");
+* nvtxDomainNameCategoryA(domain, 1, "Memory Allocation");
+* nvtxDomainNameCategoryW(domain, 2, L"Memory Transfer");
+* \endcode
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
+/** @} */
+/** \brief Annotate an NVTX category.
+ *
+ * Categories are used to group sets of events. Each category is identified
+ * through a unique ID and that ID is passed into any of the marker/range
+ * events to assign that event to a specific category. The nvtxNameCategory
+ * function calls allow the user to assign a name to a category ID.
+ *
+ * \param category - The category ID to name.
+ * \param name     - The name of the category.
+ *
+ * \remarks The category names are tracked per process.
+ *
+ * \par Example:
+ * \code
+ * nvtxNameCategory(1, "Memory Allocation");
+ * nvtxNameCategory(2, "Memory Transfer");
+ * nvtxNameCategory(3, "Memory Object Lifetime");
+ * \endcode
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name);
+/** @} */
+/** \name Functions for OS Threads Naming*/
+/* ------------------------------------------------------------------------- */
+/** \brief Annotate an OS thread.
+ *
+ * Allows the user to name an active thread of the current process. If an
+ * invalid thread ID is provided or a thread ID from a different process is
+ * used the behavior of the tool is implementation dependent.
+ *
+ * Tools expect thread ID to be a number that uniquely identifies the thread
+ * at the time of the call. Note that a thread's ID can be reused after
+ * it is destroyed. Tools may choose how to handle aliasing of thread IDs.
+ *
+ * POSIX pthread_t type returned by pthread_self() may not comply with these
+ * expectations. Please use OS-specific thread ID instead of pthread_t.
+ *
+ * The thread name is associated to the default domain.  To support domains
+ * use resource objects via ::nvtxDomainResourceCreate.
+ *
+ * \param threadId - The ID of the thread to name.
+ * \param name     - The name of the thread.
+ *
+ * \par Examples:
+ * MS Windows:
+ * \code
+ * #include <windows.h>
+ * nvtxNameOsThread(GetCurrentThreadId(), "Current thread");
+ * nvtxNameOsThread(GetThreadId(SomeThreadHandle), "Other thread");
+ * \endcode
+ *
+ * Android:
+ * \code
+ * #include <unistd.h>
+ * nvtxNameOsThreadA(gettid(), "Current thread");
+ * nvtxNameOsThreadA(getpid(), "Main thread");
+ * \endcode
+ *
+ * Linux:
+ * \code
+ * #include <sys/syscall.h>
+ * nvtxNameOsThreadA(syscall(SYS_gettid), "Current thread");
+ * \endcode
+ * \code
+ * #include <unistd.h>
+ * nvtxNameOsThreadA(getpid(), "Main thread");
+ * \endcode
+ *
+ * OS X:
+ * \code
+ * #include <sys/syscall.h>
+ * nvtxNameOsThreadA(syscall(SYS_thread_selfid), "Current thread");
+ * \endcode
+ * \code
+ * #include <pthread.h>
+ * __uint64_t id;
+ * pthread_threadid_np(pthread_self(), &id);
+ * nvtxNameOsThreadA(id, "Current thread");
+ * pthread_threadid_np(somePThreadId, &id);
+ * nvtxNameOsThreadA(id, "Other thread");
+ * \endcode
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup STRING_REGISTRATION String Registration
+*
+* Registered strings are intended to increase performance by lowering instrumentation
+* overhead.  String may be registered once and the handle may be passed in place of
+* a string where an the APIs may allow.
+*
+* See \ref STRING_REGISTRATION for more details
+*
+* @{
+*/
+/* ------------------------------------------------------------------------- */
+/** \brief Register a string.
+* Registers an immutable string with NVTX. Once registered the pointer used
+* to register the domain name can be used in nvtxEventAttributes_t
+* \ref MESSAGE_FIELD. This allows NVTX implementation to skip copying the
+* contents of the message on each event invocation.
+*
+* String registration is an optimization. It is recommended to use string
+* registration if the string will be passed to an event many times.
+*
+* String are not unregistered, except that by unregistering the entire domain
+*
+* \param domain  - Domain handle. If NULL then the global domain is used.
+* \param string    - A unique pointer to a sequence of characters.
+*
+* \return A handle representing the registered string.
+*
+* \par Example:
+* \code
+* nvtxDomainCreateA("com.nvidia.nvtx.example");
+* nvtxStringHandle_t message = nvtxDomainRegisterStringA(domain, "registered string");
+* nvtxEventAttributes_t eventAttrib = {0};
+* eventAttrib.version = NVTX_VERSION;
+* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
+* eventAttrib.message.registered = message;
+* \endcode
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string);
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \defgroup DOMAINS Domains
+*
+* Domains are used to group events to a developer defined scope. Middleware
+* vendors may also scope their own events to avoid collisions with the
+* the application developer's events, so that the application developer may
+* inspect both parts and easily differentiate or filter them.  By default
+* all events are scoped to a global domain where NULL is provided or when
+* using APIs provided b versions of NVTX below v2
+*
+* Domains are intended to be typically long lived objects with the intention
+* of logically separating events of large modules from each other such as
+* middleware libraries from each other and the main application.
+*
+* See \ref DOMAINS for more details
+*
+* @{
+*/
+/* ------------------------------------------------------------------------- */
+/** \brief Register a NVTX domain.
+*
+* Domains are used to scope annotations. All NVTX_VERSION_0 and NVTX_VERSION_1
+* annotations are scoped to the global domain. The function nvtxDomainCreate
+* creates a new named domain.
+*
+* Each domain maintains its own nvtxRangePush and nvtxRangePop stack.
+*
+* \param name - A unique string representing the domain.
+*
+* \return A handle representing the domain.
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
+*
+* nvtxMarkA("nvtxMarkA to global domain");
+*
+* nvtxEventAttributes_t eventAttrib1 = {0};
+* eventAttrib1.version = NVTX_VERSION;
+* eventAttrib1.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib1.message.ascii = "nvtxDomainMarkEx to global domain";
+* nvtxDomainMarkEx(NULL, &eventAttrib1);
+*
+* nvtxEventAttributes_t eventAttrib2 = {0};
+* eventAttrib2.version = NVTX_VERSION;
+* eventAttrib2.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+* eventAttrib2.message.ascii = "nvtxDomainMarkEx to com.nvidia.nvtx.example";
+* nvtxDomainMarkEx(domain, &eventAttrib2);
+* nvtxDomainDestroy(domain);
+* \endcode
+*
+* \sa
+* ::nvtxDomainDestroy
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* name);
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Unregister a NVTX domain.
+*
+* Unregisters the domain handle and frees all domain specific resources.
+*
+* \param domain    - the domain handle
+*
+* \par Example:
+* \code
+* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
+* nvtxDomainDestroy(domain);
+* \endcode
+*
+* \sa
+* ::nvtxDomainCreateA
+* ::nvtxDomainCreateW
+*
+* \version \NVTX_VERSION_2
+* @{ */
+NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain);
+/** @} */
+/** @} */ /*END defgroup*/
+/* ========================================================================= */
+/** \cond SHOW_HIDDEN */
+#ifdef UNICODE
+    #define nvtxMark            nvtxMarkW
+    #define nvtxRangeStart      nvtxRangeStartW
+    #define nvtxRangePush       nvtxRangePushW
+    #define nvtxNameCategory    nvtxNameCategoryW
+    #define nvtxNameOsThread    nvtxNameOsThreadW
+    /* NVTX_VERSION_2 */
+    #define nvtxDomainCreate         nvtxDomainCreateW
+    #define nvtxDomainRegisterString nvtxDomainRegisterStringW
+    #define nvtxDomainNameCategory   nvtxDomainNameCategoryW
+#else
+    #define nvtxMark            nvtxMarkA
+    #define nvtxRangeStart      nvtxRangeStartA
+    #define nvtxRangePush       nvtxRangePushA
+    #define nvtxNameCategory    nvtxNameCategoryA
+    #define nvtxNameOsThread    nvtxNameOsThreadA
+    /* NVTX_VERSION_2 */
+    #define nvtxDomainCreate         nvtxDomainCreateA
+    #define nvtxDomainRegisterString nvtxDomainRegisterStringA
+    #define nvtxDomainNameCategory   nvtxDomainNameCategoryA
+#endif
+/** \endcond */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#define NVTX_IMPL_GUARD /* Ensure other headers cannot included directly */
+#include "nvtxDetail/nvtxTypes.h"
+#ifndef NVTX_NO_IMPL
+#include "nvtxDetail/nvtxImpl.h"
+#endif /*NVTX_NO_IMPL*/
+#undef NVTX_IMPL_GUARD
+#endif /* !defined(NVTX_VERSION) */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtCuda.h ADDED Viewed

	@@ -0,0 +1,170 @@

+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#include "nvToolsExt.h"
+#include "cuda.h"
+#ifndef NVTOOLSEXT_CUDA_V3
+#define NVTOOLSEXT_CUDA_V3
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* ========================================================================= */
+/** \name Functions for CUDA Resource Naming
+*/
+/** \addtogroup RESOURCE_NAMING
+ * \section RESOURCE_NAMING_CUDA CUDA Resource Naming
+ *
+ * This section covers the API functions that allow to annotate CUDA resources
+ * with user-provided names.
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_CUDA  4
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \brief Resource types for CUDA
+*/
+typedef enum nvtxResourceCUDAType_t
+{
+    NVTX_RESOURCE_TYPE_CUDA_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDA, 1), /* CUdevice */
+    NVTX_RESOURCE_TYPE_CUDA_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 2), /* CUcontext */
+    NVTX_RESOURCE_TYPE_CUDA_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDA, 3), /* CUstream */
+    NVTX_RESOURCE_TYPE_CUDA_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 4), /* CUevent */
+} nvtxResourceCUDAType_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA device.
+ *
+ * Allows the user to associate a CUDA device with a user-provided name.
+ *
+ * \param device - The handle of the CUDA device to name.
+ * \param name   - The name of the CUDA device.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA context.
+ *
+ * Allows the user to associate a CUDA context with a user-provided name.
+ *
+ * \param context - The handle of the CUDA context to name.
+ * \param name    - The name of the CUDA context.
+ *
+ * \par Example:
+ * \code
+ * CUresult status = cuCtxCreate( &cuContext, 0, cuDevice );
+ * if ( CUDA_SUCCESS != status )
+ *     goto Error;
+ * nvtxNameCuContext(cuContext, "CTX_NAME");
+ * \endcode
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA stream.
+ *
+ * Allows the user to associate a CUDA stream with a user-provided name.
+ *
+ * \param stream - The handle of the CUDA stream to name.
+ * \param name   - The name of the CUDA stream.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA event.
+ *
+ * Allows the user to associate a CUDA event with a user-provided name.
+ *
+ * \param event - The handle of the CUDA event to name.
+ * \param name  - The name of the CUDA event.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name);
+/** @} */
+/** @} */ /* END RESOURCE_NAMING */
+/* ========================================================================= */
+#ifdef UNICODE
+  #define nvtxNameCuDevice   nvtxNameCuDeviceW
+  #define nvtxNameCuContext  nvtxNameCuContextW
+  #define nvtxNameCuStream   nvtxNameCuStreamW
+  #define nvtxNameCuEvent    nvtxNameCuEventW
+#else
+  #define nvtxNameCuDevice   nvtxNameCuDeviceA
+  #define nvtxNameCuContext  nvtxNameCuContextA
+  #define nvtxNameCuStream   nvtxNameCuStreamA
+  #define nvtxNameCuEvent    nvtxNameCuEventA
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#ifndef NVTX_NO_IMPL
+#define NVTX_IMPL_GUARD_CUDA /* Ensure other headers cannot included directly */
+#include "nvtxDetail/nvtxImplCuda_v3.h"
+#undef NVTX_IMPL_GUARD_CUDA
+#endif /*NVTX_NO_IMPL*/
+#endif /* NVTOOLSEXT_CUDA_V3 */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtSync.h ADDED Viewed

	@@ -0,0 +1,411 @@

+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#include "nvToolsExt.h"
+#ifndef NVTOOLSEXT_SYNC_V3
+#define NVTOOLSEXT_SYNC_V3
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* \cond SHOW_HIDDEN
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxSyncUserAttributes_v0) ) )
+/** \endcond */
+/**
+* \page PAGE_SYNCHRONIZATION Synchronization
+*
+* This section covers a subset of the API that allow users to track additional
+* synchronization details of their application.   Naming OS synchronization primitives
+* may allow users to better understand the data collected by traced synchronization
+* APIs.  Additionally, a user defined synchronization object can allow the users to
+* to tell the tools when the user is building their own synchronization system
+* that do not rely on the OS to provide behaviors and instead use techniques like
+* atomic operations and spinlocks.
+*
+* See module \ref SYNCHRONIZATION for details.
+*
+* \par Example:
+* \code
+* class MyMutex
+* {
+*     volatile long bLocked;
+*     nvtxSyncUser_t hSync;
+* public:
+*     MyMutex(const char* name, nvtxDomainHandle_t d){
+*          bLocked = 0;
+*
+*          nvtxSyncUserAttributes_t attribs = { 0 };
+*          attribs.version = NVTX_VERSION;
+*          attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+*          attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+*          attribs.message.ascii = name;
+*          hSync = nvtxDomainSyncUserCreate(d, &attribs);
+*     }
+*
+*     ~MyMutex() {
+*          nvtxDomainSyncUserDestroy(hSync);
+*     }
+*
+*     bool Lock() {
+*          nvtxDomainSyncUserAcquireStart(hSync);
+*          bool acquired = __sync_bool_compare_and_swap(&bLocked, 0, 1);//atomic compiler intrinsic
+*          if (acquired) {
+*              nvtxDomainSyncUserAcquireSuccess(hSync);
+*          }
+*          else {
+*              nvtxDomainSyncUserAcquireFailed(hSync);
+*          }
+*          return acquired;
+*     }
+*     void Unlock() {
+*          nvtxDomainSyncUserReleasing(hSync);
+*          bLocked = false;
+*     }
+* };
+* \endcode
+*
+* \version \NVTX_VERSION_2
+*/
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_SYNC_OS 2 /**< Synchronization objects that are OS specific. */
+#define NVTX_RESOURCE_CLASS_SYNC_PTHREAD 3 /**< Synchronization objects that are from the POSIX Threads API (pthread)*/
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \defgroup SYNCHRONIZATION Synchronization
+* See page \ref PAGE_SYNCHRONIZATION.
+* @{
+*/
+/** \brief Resource type values for OSs with POSIX Thread API support
+ */
+typedef enum nvtxResourceSyncPosixThreadType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 1), /* pthread_mutex_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_CONDITION = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 2), /* pthread_cond_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_RWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 3), /* pthread_rwlock_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_BARRIER = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 4), /* pthread_barrier_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 5), /* pthread_spinlock_t  */
+    NVTX_RESOURCE_TYPE_SYNC_PTHREAD_ONCE = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 6) /* pthread_once_t  */
+} nvtxResourceSyncPosixThreadType_t;
+/** \brief Resource type values for Windows OSs
+*/
+typedef enum nvtxResourceSyncWindowsType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_EVENT = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_CRITICAL_SECTION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
+    NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SRWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5)
+} nvtxResourceSyncWindowsType_t;
+/** \brief Resource type values for Linux and Linux derived OSs such as Android
+* \sa
+* ::nvtxResourceSyncPosixThreadType_t
+*/
+typedef enum nvtxResourceSyncLinuxType_t
+{
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_FUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_COMPLETION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_SEQLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 6),
+    NVTX_RESOURCE_TYPE_SYNC_LINUX_RCU = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 7)
+} nvtxResourceSyncLinuxType_t;
+/** \brief Resource type values for Android come from Linux.
+* \sa
+* ::nvtxResourceSyncLinuxType_t
+* ::nvtxResourceSyncPosixThreadType_t
+*/
+typedef enum nvtxResourceSyncLinuxType_t nvtxResourceSyncAndroidType_t;
+/** \brief User Defined Synchronization Object Handle .
+* \anchor SYNCUSER_HANDLE_STRUCTURE
+*
+* This structure is opaque to the user and is used as a handle to reference
+* a user defined syncrhonization object.  The tools will return a pointer through the API for the application
+* to hold on it's behalf to reference the string in the future.
+*
+*/
+typedef struct nvtxSyncUser* nvtxSyncUser_t;
+/** \brief User Defined Synchronization Object Attributes Structure.
+* \anchor USERDEF_SYNC_ATTRIBUTES_STRUCTURE
+*
+* This structure is used to describe the attributes of a user defined synchronization
+* object.  The layout of the structure is defined by a specific version of the tools
+* extension library and can change between different versions of the Tools Extension
+* library.
+*
+* \par Initializing the Attributes
+*
+* The caller should always perform the following three tasks when using
+* attributes:
+* <ul>
+*    <li>Zero the structure
+*    <li>Set the version field
+*    <li>Set the size field
+* </ul>
+*
+* Zeroing the structure sets all the event attributes types and values
+* to the default value.
+*
+* The version and size field are used by the Tools Extension
+* implementation to handle multiple versions of the attributes structure.
+*
+* It is recommended that the caller use one of the following to methods
+* to initialize the event attributes structure:
+*
+* \par Method 1: Initializing nvtxEventAttributes for future compatibility
+* \code
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+* \endcode
+*
+* \par Method 2: Initializing nvtxSyncUserAttributes_t for a specific version
+* \code
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = 1;
+* attribs.size = (uint16_t)(sizeof(nvtxSyncUserAttributes_t));
+* \endcode
+*
+* If the caller uses Method 1 it is critical that the entire binary
+* layout of the structure be configured to 0 so that all fields
+* are initialized to the default value.
+*
+* The caller should either use both NVTX_VERSION and
+* NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
+* and a versioned type (Method 2).  Using a mix of the two methods
+* will likely cause either source level incompatibility or binary
+* incompatibility in the future.
+*
+* \par Settings Attribute Types and Values
+*
+*
+* \par Example:
+* \code
+* // Initialize
+* nvtxSyncUserAttributes_t attribs = {0};
+* attribs.version = NVTX_VERSION;
+* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
+*
+* // Configure the Attributes
+* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
+* attribs.message.ascii = "Example";
+* \endcode
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+*/
+typedef struct nvtxSyncUserAttributes_v0
+{
+    /**
+    * \brief Version flag of the structure.
+    *
+    * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
+    * supported in this header file. This can optionally be overridden to
+    * another version of the tools extension library.
+    */
+    uint16_t version;
+    /**
+    * \brief Size of the structure.
+    *
+    * Needs to be set to the size in bytes of the event attribute
+    * structure used to specify the event.
+    */
+    uint16_t size;
+    /** \brief Message type specified in this attribute structure.
+    *
+    * Defines the message format of the attribute structure's \ref nvtxSyncUserAttributes_v0::message
+    * "message" field.
+    *
+    * Default Value is NVTX_MESSAGE_UNKNOWN
+    */
+    int32_t messageType;            /* nvtxMessageType_t */
+    /** \brief Message assigned to this attribute structure.
+    *
+    * The text message that is attached to an event.
+    */
+    nvtxMessageValue_t message;
+} nvtxSyncUserAttributes_v0;
+typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Create a user defined synchronization object
+* This is used to track non-OS synchronization working with spinlocks and atomics
+*
+* \param domain - Domain to own the resource
+* \param attribs - A structure to assign multiple attributes to the object.
+*
+* \return A handle that represents the newly created user defined synchronization object.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
+/* ------------------------------------------------------------------------- */
+/** \brief Destroy a user defined synchronization object
+* This is used to track non-OS synchronization working with spinlocks and atomics
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools that an attempt to acquire a user defined synchronization object
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of failure in acquiring a user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireStart
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of success in acquiring a user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireStart.
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle);
+/* ------------------------------------------------------------------------- */
+/** \brief Signal to tools of releasing a reservation on user defined synchronization object
+* This should be called after \ref nvtxDomainSyncUserAcquireSuccess.
+*
+* \param handle - A handle to the object to operate on.
+*
+* \sa
+* ::nvtxDomainSyncUserCreate
+* ::nvtxDomainSyncUserDestroy
+* ::nvtxDomainSyncUserAcquireStart
+* ::nvtxDomainSyncUserAcquireFailed
+* ::nvtxDomainSyncUserAcquireSuccess
+* ::nvtxDomainSyncUserReleasing
+*
+* \version \NVTX_VERSION_2
+*/
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle);
+/** @} */ /*END defgroup*/
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#ifndef NVTX_NO_IMPL
+#define NVTX_IMPL_GUARD_SYNC /* Ensure other headers cannot included directly */
+#include "nvtxDetail/nvtxImplSync_v3.h"
+#undef NVTX_IMPL_GUARD_SYNC
+#endif /*NVTX_NO_IMPL*/
+#endif /* NVTOOLSEXT_SYNC_V3 */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInit.h ADDED Viewed

	@@ -0,0 +1,343 @@

+/* This file was procedurally generated!  Do not modify this file by hand.  */
+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+/* ---- Platform-independent helper definitions and functions ---- */
+/* Prefer macros over inline functions to reduce symbol resolution at link time */
+#if defined(_WIN32)
+#define NVTX_PATHCHAR   wchar_t
+#define NVTX_STR(x)     L##x
+#define NVTX_GETENV     _wgetenv
+#define NVTX_BUFSIZE    MAX_PATH
+#define NVTX_DLLHANDLE  HMODULE
+#define NVTX_DLLOPEN(x) LoadLibraryW(x)
+#define NVTX_DLLFUNC    GetProcAddress
+#define NVTX_DLLCLOSE   FreeLibrary
+#define NVTX_YIELD()    SwitchToThread()
+#define NVTX_MEMBAR()   MemoryBarrier()
+#define NVTX_ATOMIC_WRITE_32(address, value)                        InterlockedExchange((volatile LONG*)address, value)
+#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) old = InterlockedCompareExchange((volatile LONG*)address, exchange, comparand)
+#elif defined(__GNUC__)
+#define NVTX_PATHCHAR   char
+#define NVTX_STR(x)     x
+#define NVTX_GETENV     getenv
+#define NVTX_BUFSIZE    PATH_MAX
+#define NVTX_DLLHANDLE  void*
+#define NVTX_DLLOPEN(x) dlopen(x, RTLD_LAZY)
+#define NVTX_DLLFUNC    dlsym
+#define NVTX_DLLCLOSE   dlclose
+#define NVTX_YIELD()    sched_yield()
+#define NVTX_MEMBAR()   __sync_synchronize()
+/* Ensure full memory barrier for atomics, to match Windows functions */
+#define NVTX_ATOMIC_WRITE_32(address, value)                  __sync_synchronize();       __sync_lock_test_and_set(address, value)
+#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) __sync_synchronize(); old = __sync_val_compare_and_swap(address, exchange, comparand)
+#else
+#error The library does not support your configuration!
+#endif
+/* Define this to 1 for platforms that where pre-injected libraries can be discovered. */
+#if defined(_WIN32)
+/* TODO */
+#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
+#else
+#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
+#endif
+/* Define this to 1 for platforms that support environment variables */
+/* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */
+/* Try:  #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
+#define NVTX_SUPPORT_ENV_VARS 1
+/* Define this to 1 for platforms that support dynamic/shared libraries */
+#define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1
+/* Injection libraries implementing InitializeInjectionNvtx2 may be statically linked,
+*  and this will override any dynamic injection.  Useful for platforms where dynamic
+*  injection is not available.  Since weak symbols not explicitly marked extern are
+*  guaranteed to be initialized to zero if no definitions are found by the linker, the
+*  dynamic injection process proceeds normally if pfnInitializeInjectionNvtx2 is 0. */
+#if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__)
+#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1
+/* To statically inject an NVTX library, define InitializeInjectionNvtx2_fnptr as a normal
+*  symbol (not weak) pointing to the implementation of InitializeInjectionNvtx2 (which
+*  does not need to be named "InitializeInjectionNvtx2" as is necessary in a dynamic
+*  injection library. */
+__attribute__((weak)) NvtxInitializeInjectionNvtxFunc_t InitializeInjectionNvtx2_fnptr;
+#else
+#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0
+#endif
+/* This function tries to find or load an NVTX injection library and get the
+*  address of its InitializeInjection2 function.  If such a function pointer
+*  is found, it is called, and passed the address of this NVTX instance's
+*  nvtxGetExportTable function, so the injection can attach to this instance.
+*  If the initialization fails for any reason, any dynamic library loaded will
+*  be freed, and all NVTX implementation functions will be set to no-ops.  If
+*  initialization succeeds, NVTX functions not attached to the tool will be set
+*  to no-ops.  This is implemented as one function instead of several small
+*  functions to minimize the number of weak symbols the linker must resolve.
+*  Order of search is:
+*  - Pre-injected library exporting InitializeInjectionNvtx2
+*  - Loadable library exporting InitializeInjectionNvtx2
+*      - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64)
+*      - On Android, libNvtxInjection??.so within the package (?? is 32 or 64)
+*  - Statically-linked injection library defining InitializeInjectionNvtx2_fnptr
+*/
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void);
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void)
+{
+    const char* const initFuncName = "InitializeInjectionNvtx2";
+    NvtxInitializeInjectionNvtxFunc_t init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)0;
+    NVTX_DLLHANDLE injectionLibraryHandle = (NVTX_DLLHANDLE)0;
+    int entryPointStatus = 0;
+#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
+    /* Use POSIX global symbol chain to query for init function from any module */
+    init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(0, initFuncName);
+#endif
+#if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
+    /* Try discovering dynamic injection library to load */
+    if (!init_fnptr)
+    {
+#if NVTX_SUPPORT_ENV_VARS
+        /* If env var NVTX_INJECTION64_PATH is set, it should contain the path
+        *  to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */
+        const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4)
+            ? NVTX_STR("NVTX_INJECTION32_PATH")
+            : NVTX_STR("NVTX_INJECTION64_PATH");
+#endif /* NVTX_SUPPORT_ENV_VARS */
+        NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE];
+        const NVTX_PATHCHAR* injectionLibraryPath = (const NVTX_PATHCHAR*)0;
+        /* Refer to this variable explicitly in case all references to it are #if'ed out */
+        (void)injectionLibraryPathBuf;
+#if NVTX_SUPPORT_ENV_VARS
+        /* Disable the warning for getenv & _wgetenv -- this usage is safe because
+        *  these functions are not called again before using the returned value. */
+#if defined(_MSC_VER)
+#pragma warning( push )
+#pragma warning( disable : 4996 )
+#endif
+        injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName);
+#if defined(_MSC_VER)
+#pragma warning( pop )
+#endif
+#endif
+#if defined(__ANDROID__)
+        if (!injectionLibraryPath)
+        {
+            const char *bits = (sizeof(void*) == 4) ? "32" : "64";
+            char cmdlineBuf[32];
+            char pkgName[PATH_MAX];
+            int count;
+            int pid;
+            FILE *fp;
+            size_t bytesRead;
+            size_t pos;
+            pid = (int)getpid();
+            count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid);
+            if (count <= 0 || count >= (int)sizeof(cmdlineBuf))
+            {
+                NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            fp = fopen(cmdlineBuf, "r");
+            if (!fp)
+            {
+                NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp);
+            fclose(fp);
+            if (bytesRead == 0)
+            {
+                NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            pkgName[bytesRead] = 0;
+            /* String can contain colon as a process separator. In this case the package name is before the colon. */
+            pos = 0;
+            while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0')
+            {
+                ++pos;
+            }
+            pkgName[pos] = 0;
+            count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits);
+            if (count <= 0 || count >= NVTX_BUFSIZE)
+            {
+                NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits);
+                return NVTX_ERR_INIT_ACCESS_LIBRARY;
+            }
+            /* On Android, verify path is accessible due to aggressive file access restrictions. */
+            /* For dlopen, if the filename contains a leading slash, then it is interpreted as a */
+            /* relative or absolute pathname; otherwise it will follow the rules in ld.so. */
+            if (injectionLibraryPathBuf[0] == '/')
+            {
+#if (__ANDROID_API__ < 21)
+                int access_err = access(injectionLibraryPathBuf, F_OK | R_OK);
+#else
+                int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0);
+#endif
+                if (access_err != 0)
+                {
+                    NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf);
+                    return NVTX_ERR_INIT_ACCESS_LIBRARY;
+                }
+            }
+            injectionLibraryPath = injectionLibraryPathBuf;
+        }
+#endif
+        /* At this point, injectionLibraryPath is specified if a dynamic
+        *  injection library was specified by a tool. */
+        if (injectionLibraryPath)
+        {
+            /* Load the injection library */
+            injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath);
+            if (!injectionLibraryHandle)
+            {
+                NVTX_ERR("Failed to load injection library\n");
+                return NVTX_ERR_INIT_LOAD_LIBRARY;
+            }
+            else
+            {
+                /* Attempt to get the injection library's entry-point */
+                init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(injectionLibraryHandle, initFuncName);
+                if (!init_fnptr)
+                {
+                    NVTX_DLLCLOSE(injectionLibraryHandle);
+                    NVTX_ERR("Failed to get address of function InitializeInjectionNvtx2 from injection library\n");
+                    return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT;
+                }
+            }
+        }
+    }
+#endif
+#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
+    if (!init_fnptr)
+    {
+        /* Check weakly-defined function pointer.  A statically-linked injection can define this as
+        *  a normal symbol and it will take precedence over a dynamic injection. */
+        if (InitializeInjectionNvtx2_fnptr)
+        {
+            init_fnptr = InitializeInjectionNvtx2_fnptr;
+        }
+    }
+#endif
+    /* At this point, if init_fnptr is not set, then no tool has specified
+    *  an NVTX injection library -- return non-success result so all NVTX
+    *  API functions will be set to no-ops. */
+    if (!init_fnptr)
+    {
+        return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE;
+    }
+    /* Invoke injection library's initialization function.  If it returns
+    *  0 (failure) and a dynamic injection was loaded, unload it. */
+    entryPointStatus = init_fnptr(NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable));
+    if (entryPointStatus == 0)
+    {
+        NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n");
+        if (injectionLibraryHandle)
+        {
+            NVTX_DLLCLOSE(injectionLibraryHandle);
+        }
+        return NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT;
+    }
+    return NVTX_SUCCESS;
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void)
+{
+    unsigned int old;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState == NVTX_INIT_STATE_COMPLETE)
+    {
+        return;
+    }
+    NVTX_ATOMIC_CAS_32(
+        old,
+        &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
+        NVTX_INIT_STATE_STARTED,
+        NVTX_INIT_STATE_FRESH);
+    if (old == NVTX_INIT_STATE_FRESH)
+    {
+        int result;
+        int forceAllToNoops;
+        /* Load & initialize injection library -- it will assign the function pointers */
+        result = NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)();
+        /* Set all pointers not assigned by the injection to null */
+        forceAllToNoops = result != NVTX_SUCCESS; /* Set all to null if injection init failed */
+        NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(forceAllToNoops);
+        /* Signal that initialization has finished, so now the assigned function pointers will be used */
+        NVTX_ATOMIC_WRITE_32(
+            &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
+            NVTX_INIT_STATE_COMPLETE);
+    }
+    else /* Spin-wait until initialization has finished */
+    {
+        NVTX_MEMBAR();
+        while (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState != NVTX_INIT_STATE_COMPLETE)
+        {
+            NVTX_YIELD();
+            NVTX_MEMBAR();
+        }
+    }
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInitDecls.h ADDED Viewed

	@@ -0,0 +1,73 @@

+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved);
+NVTX_LINKONCE_FWDDECL_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle);
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle);

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxLinkOnce.h ADDED Viewed

	@@ -0,0 +1,75 @@

+#ifndef __NVTX_LINKONCE_H__
+#define __NVTX_LINKONCE_H__
+/* This header defines macros to permit making definitions of global variables
+ * and functions in C/C++ header files which may be included multiple times in
+ * a translation unit or linkage unit.  It allows authoring header-only libraries
+ * which can be used by multiple other header-only libraries (either as the same
+ * copy or multiple copies), and does not require any build changes, such as
+ * adding another .c file, linking a static library, or deploying a dynamic
+ * library.  Globals defined with these macros have the property that they have
+ * the same address, pointing to a single instance, for the entire linkage unit.
+ * It is expected but not guaranteed that each linkage unit will have a separate
+ * instance.
+ *
+ * In some situations it is desirable to declare a variable without initializing
+ * it, refer to it in code or other variables' initializers, and then initialize
+ * it later.  Similarly, functions can be prototyped, have their address taken,
+ * and then have their body defined later.  In such cases, use the FWDDECL macros
+ * when forward-declaring LINKONCE global variables without initializers and
+ * function prototypes, and then use the DEFINE macros when later defining them.
+ * Although in many cases the FWDDECL macro is equivalent to the DEFINE macro,
+ * following this pattern makes code maximally portable.
+ */
+#if defined(__MINGW32__) /* MinGW */
+    #define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0.")))
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline NVTX_LINKONCE_WEAK
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
+    #endif
+#elif defined(_MSC_VER) /* MSVC */
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   extern "C" __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION __inline
+    #endif
+#elif defined(__CYGWIN__) && defined(__clang__) /* Clang on Cygwin */
+    #define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0.")))
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   NVTX_LINKONCE_WEAK
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_WEAK
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   NVTX_LINKONCE_WEAK
+        #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
+    #endif
+#elif defined(__CYGWIN__) /* Assume GCC or compatible */
+    #define NVTX_LINKONCE_WEAK __attribute__((weak))
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   __declspec(selectany)
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   NVTX_LINKONCE_WEAK
+        #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
+    #endif
+#else /* All others: Assume GCC, clang, or compatible */
+    #define NVTX_LINKONCE_WEAK   __attribute__((weak))
+    #define NVTX_LINKONCE_HIDDEN __attribute__((visibility("hidden")))
+    #if defined(__cplusplus)
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
+        #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_HIDDEN inline
+    #else
+        #define NVTX_LINKONCE_DEFINE_GLOBAL   NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
+        #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
+    #endif
+#endif
+#define NVTX_LINKONCE_FWDDECL_GLOBAL   NVTX_LINKONCE_DEFINE_GLOBAL   extern
+#define NVTX_LINKONCE_FWDDECL_FUNCTION NVTX_LINKONCE_DEFINE_FUNCTION
+#endif /* __NVTX_LINKONCE_H__ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/cmdline.py ADDED Viewed

	@@ -0,0 +1,668 @@

+"""
+    pygments.cmdline
+    ~~~~~~~~~~~~~~~~
+    Command line interface.
+    :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import os
+import sys
+import shutil
+import argparse
+from textwrap import dedent
+from pip._vendor.pygments import __version__, highlight
+from pip._vendor.pygments.util import ClassNotFound, OptionError, docstring_headline, \
+    guess_decode, guess_decode_from_terminal, terminal_encoding, \
+    UnclosingTextIOWrapper
+from pip._vendor.pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
+    load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename
+from pip._vendor.pygments.lexers.special import TextLexer
+from pip._vendor.pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
+from pip._vendor.pygments.formatters import get_all_formatters, get_formatter_by_name, \
+    load_formatter_from_file, get_formatter_for_filename, find_formatter_class
+from pip._vendor.pygments.formatters.terminal import TerminalFormatter
+from pip._vendor.pygments.formatters.terminal256 import Terminal256Formatter, TerminalTrueColorFormatter
+from pip._vendor.pygments.filters import get_all_filters, find_filter_class
+from pip._vendor.pygments.styles import get_all_styles, get_style_by_name
+def _parse_options(o_strs):
+    opts = {}
+    if not o_strs:
+        return opts
+    for o_str in o_strs:
+        if not o_str.strip():
+            continue
+        o_args = o_str.split(',')
+        for o_arg in o_args:
+            o_arg = o_arg.strip()
+            try:
+                o_key, o_val = o_arg.split('=', 1)
+                o_key = o_key.strip()
+                o_val = o_val.strip()
+            except ValueError:
+                opts[o_arg] = True
+            else:
+                opts[o_key] = o_val
+    return opts
+def _parse_filters(f_strs):
+    filters = []
+    if not f_strs:
+        return filters
+    for f_str in f_strs:
+        if ':' in f_str:
+            fname, fopts = f_str.split(':', 1)
+            filters.append((fname, _parse_options([fopts])))
+        else:
+            filters.append((f_str, {}))
+    return filters
+def _print_help(what, name):
+    try:
+        if what == 'lexer':
+            cls = get_lexer_by_name(name)
+            print(f"Help on the {cls.name} lexer:")
+            print(dedent(cls.__doc__))
+        elif what == 'formatter':
+            cls = find_formatter_class(name)
+            print(f"Help on the {cls.name} formatter:")
+            print(dedent(cls.__doc__))
+        elif what == 'filter':
+            cls = find_filter_class(name)
+            print(f"Help on the {name} filter:")
+            print(dedent(cls.__doc__))
+        return 0
+    except (AttributeError, ValueError):
+        print(f"{what} not found!", file=sys.stderr)
+        return 1
+def _print_list(what):
+    if what == 'lexer':
+        print()
+        print("Lexers:")
+        print("~~~~~~~")
+        info = []
+        for fullname, names, exts, _ in get_all_lexers():
+            tup = (', '.join(names)+':', fullname,
+                   exts and '(filenames ' + ', '.join(exts) + ')' or '')
+            info.append(tup)
+        info.sort()
+        for i in info:
+            print(('* {}\n    {} {}').format(*i))
+    elif what == 'formatter':
+        print()
+        print("Formatters:")
+        print("~~~~~~~~~~~")
+        info = []
+        for cls in get_all_formatters():
+            doc = docstring_headline(cls)
+            tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and
+                   '(filenames ' + ', '.join(cls.filenames) + ')' or '')
+            info.append(tup)
+        info.sort()
+        for i in info:
+            print(('* {}\n    {} {}').format(*i))
+    elif what == 'filter':
+        print()
+        print("Filters:")
+        print("~~~~~~~~")
+        for name in get_all_filters():
+            cls = find_filter_class(name)
+            print("* " + name + ':')
+            print(f"    {docstring_headline(cls)}")
+    elif what == 'style':
+        print()
+        print("Styles:")
+        print("~~~~~~~")
+        for name in get_all_styles():
+            cls = get_style_by_name(name)
+            print("* " + name + ':')
+            print(f"    {docstring_headline(cls)}")
+def _print_list_as_json(requested_items):
+    import json
+    result = {}
+    if 'lexer' in requested_items:
+        info = {}
+        for fullname, names, filenames, mimetypes in get_all_lexers():
+            info[fullname] = {
+                'aliases': names,
+                'filenames': filenames,
+                'mimetypes': mimetypes
+            }
+        result['lexers'] = info
+    if 'formatter' in requested_items:
+        info = {}
+        for cls in get_all_formatters():
+            doc = docstring_headline(cls)
+            info[cls.name] = {
+                'aliases': cls.aliases,
+                'filenames': cls.filenames,
+                'doc': doc
+            }
+        result['formatters'] = info
+    if 'filter' in requested_items:
+        info = {}
+        for name in get_all_filters():
+            cls = find_filter_class(name)
+            info[name] = {
+                'doc': docstring_headline(cls)
+            }
+        result['filters'] = info
+    if 'style' in requested_items:
+        info = {}
+        for name in get_all_styles():
+            cls = get_style_by_name(name)
+            info[name] = {
+                'doc': docstring_headline(cls)
+            }
+        result['styles'] = info
+    json.dump(result, sys.stdout)
+def main_inner(parser, argns):
+    if argns.help:
+        parser.print_help()
+        return 0
+    if argns.V:
+        print(f'Pygments version {__version__}, (c) 2006-2024 by Georg Brandl, Matthäus '
+              'Chajdas and contributors.')
+        return 0
+    def is_only_option(opt):
+        return not any(v for (k, v) in vars(argns).items() if k != opt)
+    # handle ``pygmentize -L``
+    if argns.L is not None:
+        arg_set = set()
+        for k, v in vars(argns).items():
+            if v:
+                arg_set.add(k)
+        arg_set.discard('L')
+        arg_set.discard('json')
+        if arg_set:
+            parser.print_help(sys.stderr)
+            return 2
+        # print version
+        if not argns.json:
+            main(['', '-V'])
+        allowed_types = {'lexer', 'formatter', 'filter', 'style'}
+        largs = [arg.rstrip('s') for arg in argns.L]
+        if any(arg not in allowed_types for arg in largs):
+            parser.print_help(sys.stderr)
+            return 0
+        if not largs:
+            largs = allowed_types
+        if not argns.json:
+            for arg in largs:
+                _print_list(arg)
+        else:
+            _print_list_as_json(largs)
+        return 0
+    # handle ``pygmentize -H``
+    if argns.H:
+        if not is_only_option('H'):
+            parser.print_help(sys.stderr)
+            return 2
+        what, name = argns.H
+        if what not in ('lexer', 'formatter', 'filter'):
+            parser.print_help(sys.stderr)
+            return 2
+        return _print_help(what, name)
+    # parse -O options
+    parsed_opts = _parse_options(argns.O or [])
+    # parse -P options
+    for p_opt in argns.P or []:
+        try:
+            name, value = p_opt.split('=', 1)
+        except ValueError:
+            parsed_opts[p_opt] = True
+        else:
+            parsed_opts[name] = value
+    # encodings
+    inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
+    outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
+    # handle ``pygmentize -N``
+    if argns.N:
+        lexer = find_lexer_class_for_filename(argns.N)
+        if lexer is None:
+            lexer = TextLexer
+        print(lexer.aliases[0])
+        return 0
+    # handle ``pygmentize -C``
+    if argns.C:
+        inp = sys.stdin.buffer.read()
+        try:
+            lexer = guess_lexer(inp, inencoding=inencoding)
+        except ClassNotFound:
+            lexer = TextLexer
+        print(lexer.aliases[0])
+        return 0
+    # handle ``pygmentize -S``
+    S_opt = argns.S
+    a_opt = argns.a
+    if S_opt is not None:
+        f_opt = argns.f
+        if not f_opt:
+            parser.print_help(sys.stderr)
+            return 2
+        if argns.l or argns.INPUTFILE:
+            parser.print_help(sys.stderr)
+            return 2
+        try:
+            parsed_opts['style'] = S_opt
+            fmter = get_formatter_by_name(f_opt, **parsed_opts)
+        except ClassNotFound as err:
+            print(err, file=sys.stderr)
+            return 1
+        print(fmter.get_style_defs(a_opt or ''))
+        return 0
+    # if no -S is given, -a is not allowed
+    if argns.a is not None:
+        parser.print_help(sys.stderr)
+        return 2
+    # parse -F options
+    F_opts = _parse_filters(argns.F or [])
+    # -x: allow custom (eXternal) lexers and formatters
+    allow_custom_lexer_formatter = bool(argns.x)
+    # select lexer
+    lexer = None
+    # given by name?
+    lexername = argns.l
+    if lexername:
+        # custom lexer, located relative to user's cwd
+        if allow_custom_lexer_formatter and '.py' in lexername:
+            try:
+                filename = None
+                name = None
+                if ':' in lexername:
+                    filename, name = lexername.rsplit(':', 1)
+                    if '.py' in name:
+                        # This can happen on Windows: If the lexername is
+                        # C:\lexer.py -- return to normal load path in that case
+                        name = None
+                if filename and name:
+                    lexer = load_lexer_from_file(filename, name,
+                                                 **parsed_opts)
+                else:
+                    lexer = load_lexer_from_file(lexername, **parsed_opts)
+            except ClassNotFound as err:
+                print('Error:', err, file=sys.stderr)
+                return 1
+        else:
+            try:
+                lexer = get_lexer_by_name(lexername, **parsed_opts)
+            except (OptionError, ClassNotFound) as err:
+                print('Error:', err, file=sys.stderr)
+                return 1
+    # read input code
+    code = None
+    if argns.INPUTFILE:
+        if argns.s:
+            print('Error: -s option not usable when input file specified',
+                  file=sys.stderr)
+            return 2
+        infn = argns.INPUTFILE
+        try:
+            with open(infn, 'rb') as infp:
+                code = infp.read()
+        except Exception as err:
+            print('Error: cannot read infile:', err, file=sys.stderr)
+            return 1
+        if not inencoding:
+            code, inencoding = guess_decode(code)
+        # do we have to guess the lexer?
+        if not lexer:
+            try:
+                lexer = get_lexer_for_filename(infn, code, **parsed_opts)
+            except ClassNotFound as err:
+                if argns.g:
+                    try:
+                        lexer = guess_lexer(code, **parsed_opts)
+                    except ClassNotFound:
+                        lexer = TextLexer(**parsed_opts)
+                else:
+                    print('Error:', err, file=sys.stderr)
+                    return 1
+            except OptionError as err:
+                print('Error:', err, file=sys.stderr)
+                return 1
+    elif not argns.s:  # treat stdin as full file (-s support is later)
+        # read code from terminal, always in binary mode since we want to
+        # decode ourselves and be tolerant with it
+        code = sys.stdin.buffer.read()  # use .buffer to get a binary stream
+        if not inencoding:
+            code, inencoding = guess_decode_from_terminal(code, sys.stdin)
+            # else the lexer will do the decoding
+        if not lexer:
+            try:
+                lexer = guess_lexer(code, **parsed_opts)
+            except ClassNotFound:
+                lexer = TextLexer(**parsed_opts)
+    else:  # -s option needs a lexer with -l
+        if not lexer:
+            print('Error: when using -s a lexer has to be selected with -l',
+                  file=sys.stderr)
+            return 2
+    # process filters
+    for fname, fopts in F_opts:
+        try:
+            lexer.add_filter(fname, **fopts)
+        except ClassNotFound as err:
+            print('Error:', err, file=sys.stderr)
+            return 1
+    # select formatter
+    outfn = argns.o
+    fmter = argns.f
+    if fmter:
+        # custom formatter, located relative to user's cwd
+        if allow_custom_lexer_formatter and '.py' in fmter:
+            try:
+                filename = None
+                name = None
+                if ':' in fmter:
+                    # Same logic as above for custom lexer
+                    filename, name = fmter.rsplit(':', 1)
+                    if '.py' in name:
+                        name = None
+                if filename and name:
+                    fmter = load_formatter_from_file(filename, name,
+                                                     **parsed_opts)
+                else:
+                    fmter = load_formatter_from_file(fmter, **parsed_opts)
+            except ClassNotFound as err:
+                print('Error:', err, file=sys.stderr)
+                return 1
+        else:
+            try:
+                fmter = get_formatter_by_name(fmter, **parsed_opts)
+            except (OptionError, ClassNotFound) as err:
+                print('Error:', err, file=sys.stderr)
+                return 1
+    if outfn:
+        if not fmter:
+            try:
+                fmter = get_formatter_for_filename(outfn, **parsed_opts)
+            except (OptionError, ClassNotFound) as err:
+                print('Error:', err, file=sys.stderr)
+                return 1
+        try:
+            outfile = open(outfn, 'wb')
+        except Exception as err:
+            print('Error: cannot open outfile:', err, file=sys.stderr)
+            return 1
+    else:
+        if not fmter:
+            if os.environ.get('COLORTERM','') in ('truecolor', '24bit'):
+                fmter = TerminalTrueColorFormatter(**parsed_opts)
+            elif '256' in os.environ.get('TERM', ''):
+                fmter = Terminal256Formatter(**parsed_opts)
+            else:
+                fmter = TerminalFormatter(**parsed_opts)
+        outfile = sys.stdout.buffer
+    # determine output encoding if not explicitly selected
+    if not outencoding:
+        if outfn:
+            # output file? use lexer encoding for now (can still be None)
+            fmter.encoding = inencoding
+        else:
+            # else use terminal encoding
+            fmter.encoding = terminal_encoding(sys.stdout)
+    # provide coloring under Windows, if possible
+    if not outfn and sys.platform in ('win32', 'cygwin') and \
+       fmter.name in ('Terminal', 'Terminal256'):  # pragma: no cover
+        # unfortunately colorama doesn't support binary streams on Py3
+        outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
+        fmter.encoding = None
+        try:
+            import colorama.initialise
+        except ImportError:
+            pass
+        else:
+            outfile = colorama.initialise.wrap_stream(
+                outfile, convert=None, strip=None, autoreset=False, wrap=True)
+    # When using the LaTeX formatter and the option `escapeinside` is
+    # specified, we need a special lexer which collects escaped text
+    # before running the chosen language lexer.
+    escapeinside = parsed_opts.get('escapeinside', '')
+    if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
+        left = escapeinside[0]
+        right = escapeinside[1]
+        lexer = LatexEmbeddedLexer(left, right, lexer)
+    # ... and do it!
+    if not argns.s:
+        # process whole input as per normal...
+        try:
+            highlight(code, lexer, fmter, outfile)
+        finally:
+            if outfn:
+                outfile.close()
+        return 0
+    else:
+        # line by line processing of stdin (eg: for 'tail -f')...
+        try:
+            while 1:
+                line = sys.stdin.buffer.readline()
+                if not line:
+                    break
+                if not inencoding:
+                    line = guess_decode_from_terminal(line, sys.stdin)[0]
+                highlight(line, lexer, fmter, outfile)
+                if hasattr(outfile, 'flush'):
+                    outfile.flush()
+            return 0
+        except KeyboardInterrupt:  # pragma: no cover
+            return 0
+        finally:
+            if outfn:
+                outfile.close()
+class HelpFormatter(argparse.HelpFormatter):
+    def __init__(self, prog, indent_increment=2, max_help_position=16, width=None):
+        if width is None:
+            try:
+                width = shutil.get_terminal_size().columns - 2
+            except Exception:
+                pass
+        argparse.HelpFormatter.__init__(self, prog, indent_increment,
+                                        max_help_position, width)
+def main(args=sys.argv):
+    """
+    Main command line entry point.
+    """
+    desc = "Highlight an input file and write the result to an output file."
+    parser = argparse.ArgumentParser(description=desc, add_help=False,
+                                     formatter_class=HelpFormatter)
+    operation = parser.add_argument_group('Main operation')
+    lexersel = operation.add_mutually_exclusive_group()
+    lexersel.add_argument(
+        '-l', metavar='LEXER',
+        help='Specify the lexer to use.  (Query names with -L.)  If not '
+        'given and -g is not present, the lexer is guessed from the filename.')
+    lexersel.add_argument(
+        '-g', action='store_true',
+        help='Guess the lexer from the file contents, or pass through '
+        'as plain text if nothing can be guessed.')
+    operation.add_argument(
+        '-F', metavar='FILTER[:options]', action='append',
+        help='Add a filter to the token stream.  (Query names with -L.) '
+        'Filter options are given after a colon if necessary.')
+    operation.add_argument(
+        '-f', metavar='FORMATTER',
+        help='Specify the formatter to use.  (Query names with -L.) '
+        'If not given, the formatter is guessed from the output filename, '
+        'and defaults to the terminal formatter if the output is to the '
+        'terminal or an unknown file extension.')
+    operation.add_argument(
+        '-O', metavar='OPTION=value[,OPTION=value,...]', action='append',
+        help='Give options to the lexer and formatter as a comma-separated '
+        'list of key-value pairs. '
+        'Example: `-O bg=light,python=cool`.')
+    operation.add_argument(
+        '-P', metavar='OPTION=value', action='append',
+        help='Give a single option to the lexer and formatter - with this '
+        'you can pass options whose value contains commas and equal signs. '
+        'Example: `-P "heading=Pygments, the Python highlighter"`.')
+    operation.add_argument(
+        '-o', metavar='OUTPUTFILE',
+        help='Where to write the output.  Defaults to standard output.')
+    operation.add_argument(
+        'INPUTFILE', nargs='?',
+        help='Where to read the input.  Defaults to standard input.')
+    flags = parser.add_argument_group('Operation flags')
+    flags.add_argument(
+        '-v', action='store_true',
+        help='Print a detailed traceback on unhandled exceptions, which '
+        'is useful for debugging and bug reports.')
+    flags.add_argument(
+        '-s', action='store_true',
+        help='Process lines one at a time until EOF, rather than waiting to '
+        'process the entire file.  This only works for stdin, only for lexers '
+        'with no line-spanning constructs, and is intended for streaming '
+        'input such as you get from `tail -f`. '
+        'Example usage: `tail -f sql.log | pygmentize -s -l sql`.')
+    flags.add_argument(
+        '-x', action='store_true',
+        help='Allow custom lexers and formatters to be loaded from a .py file '
+        'relative to the current working directory. For example, '
+        '`-l ./customlexer.py -x`. By default, this option expects a file '
+        'with a class named CustomLexer or CustomFormatter; you can also '
+        'specify your own class name with a colon (`-l ./lexer.py:MyLexer`). '
+        'Users should be very careful not to use this option with untrusted '
+        'files, because it will import and run them.')
+    flags.add_argument('--json', help='Output as JSON. This can '
+        'be only used in conjunction with -L.',
+        default=False,
+        action='store_true')
+    special_modes_group = parser.add_argument_group(
+        'Special modes - do not do any highlighting')
+    special_modes = special_modes_group.add_mutually_exclusive_group()
+    special_modes.add_argument(
+        '-S', metavar='STYLE -f formatter',
+        help='Print style definitions for STYLE for a formatter '
+        'given with -f. The argument given by -a is formatter '
+        'dependent.')
+    special_modes.add_argument(
+        '-L', nargs='*', metavar='WHAT',
+        help='List lexers, formatters, styles or filters -- '
+        'give additional arguments for the thing(s) you want to list '
+        '(e.g. "styles"), or omit them to list everything.')
+    special_modes.add_argument(
+        '-N', metavar='FILENAME',
+        help='Guess and print out a lexer name based solely on the given '
+        'filename. Does not take input or highlight anything. If no specific '
+        'lexer can be determined, "text" is printed.')
+    special_modes.add_argument(
+        '-C', action='store_true',
+        help='Like -N, but print out a lexer name based solely on '
+        'a given content from standard input.')
+    special_modes.add_argument(
+        '-H', action='store', nargs=2, metavar=('NAME', 'TYPE'),
+        help='Print detailed help for the object <name> of type <type>, '
+        'where <type> is one of "lexer", "formatter" or "filter".')
+    special_modes.add_argument(
+        '-V', action='store_true',
+        help='Print the package version.')
+    special_modes.add_argument(
+        '-h', '--help', action='store_true',
+        help='Print this help.')
+    special_modes_group.add_argument(
+        '-a', metavar='ARG',
+        help='Formatter-specific additional argument for the -S (print '
+        'style sheet) mode.')
+    argns = parser.parse_args(args[1:])
+    try:
+        return main_inner(parser, argns)
+    except BrokenPipeError:
+        # someone closed our stdout, e.g. by quitting a pager.
+        return 0
+    except Exception:
+        if argns.v:
+            print(file=sys.stderr)
+            print('*' * 65, file=sys.stderr)
+            print('An unhandled exception occurred while highlighting.',
+                  file=sys.stderr)
+            print('Please report the whole traceback to the issue tracker at',
+                  file=sys.stderr)
+            print('<https://github.com/pygments/pygments/issues>.',
+                  file=sys.stderr)
+            print('*' * 65, file=sys.stderr)
+            print(file=sys.stderr)
+            raise
+        import traceback
+        info = traceback.format_exception(*sys.exc_info())
+        msg = info[-1].strip()
+        if len(info) >= 3:
+            # extract relevant file and position info
+            msg += '\n   (f{})'.format(info[-2].split('\n')[0].strip()[1:])
+        print(file=sys.stderr)
+        print('*** Error while highlighting:', file=sys.stderr)
+        print(msg, file=sys.stderr)
+        print('*** If this is a bug you want to report, please rerun with -v.',
+              file=sys.stderr)
+        return 1

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/filters/__init__.py ADDED Viewed

	@@ -0,0 +1,940 @@

+"""
+    pygments.filters
+    ~~~~~~~~~~~~~~~~
+    Module containing filter lookup functions and default
+    filters.
+    :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import re
+from pip._vendor.pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
+    string_to_tokentype
+from pip._vendor.pygments.filter import Filter
+from pip._vendor.pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
+    get_choice_opt, ClassNotFound, OptionError
+from pip._vendor.pygments.plugin import find_plugin_filters
+def find_filter_class(filtername):
+    """Lookup a filter by name. Return None if not found."""
+    if filtername in FILTERS:
+        return FILTERS[filtername]
+    for name, cls in find_plugin_filters():
+        if name == filtername:
+            return cls
+    return None
+def get_filter_by_name(filtername, **options):
+    """Return an instantiated filter.
+    Options are passed to the filter initializer if wanted.
+    Raise a ClassNotFound if not found.
+    """
+    cls = find_filter_class(filtername)
+    if cls:
+        return cls(**options)
+    else:
+        raise ClassNotFound(f'filter {filtername!r} not found')
+def get_all_filters():
+    """Return a generator of all filter names."""
+    yield from FILTERS
+    for name, _ in find_plugin_filters():
+        yield name
+def _replace_special(ttype, value, regex, specialttype,
+                     replacefunc=lambda x: x):
+    last = 0
+    for match in regex.finditer(value):
+        start, end = match.start(), match.end()
+        if start != last:
+            yield ttype, value[last:start]
+        yield specialttype, replacefunc(value[start:end])
+        last = end
+    if last != len(value):
+        yield ttype, value[last:]
+class CodeTagFilter(Filter):
+    """Highlight special code tags in comments and docstrings.
+    Options accepted:
+    `codetags` : list of strings
+       A list of strings that are flagged as code tags.  The default is to
+       highlight ``XXX``, ``TODO``, ``FIXME``, ``BUG`` and ``NOTE``.
+    .. versionchanged:: 2.13
+       Now recognizes ``FIXME`` by default.
+    """
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+        tags = get_list_opt(options, 'codetags',
+                            ['XXX', 'TODO', 'FIXME', 'BUG', 'NOTE'])
+        self.tag_re = re.compile(r'\b({})\b'.format('|'.join([
+            re.escape(tag) for tag in tags if tag
+        ])))
+    def filter(self, lexer, stream):
+        regex = self.tag_re
+        for ttype, value in stream:
+            if ttype in String.Doc or \
+               ttype in Comment and \
+               ttype not in Comment.Preproc:
+                yield from _replace_special(ttype, value, regex, Comment.Special)
+            else:
+                yield ttype, value
+class SymbolFilter(Filter):
+    """Convert mathematical symbols such as \\<longrightarrow> in Isabelle
+    or \\longrightarrow in LaTeX into Unicode characters.
+    This is mostly useful for HTML or console output when you want to
+    approximate the source rendering you'd see in an IDE.
+    Options accepted:
+    `lang` : string
+       The symbol language. Must be one of ``'isabelle'`` or
+       ``'latex'``.  The default is ``'isabelle'``.
+    """
+    latex_symbols = {
+        '\\alpha'                : '\U000003b1',
+        '\\beta'                 : '\U000003b2',
+        '\\gamma'                : '\U000003b3',
+        '\\delta'                : '\U000003b4',
+        '\\varepsilon'           : '\U000003b5',
+        '\\zeta'                 : '\U000003b6',
+        '\\eta'                  : '\U000003b7',
+        '\\vartheta'             : '\U000003b8',
+        '\\iota'                 : '\U000003b9',
+        '\\kappa'                : '\U000003ba',
+        '\\lambda'               : '\U000003bb',
+        '\\mu'                   : '\U000003bc',
+        '\\nu'                   : '\U000003bd',
+        '\\xi'                   : '\U000003be',
+        '\\pi'                   : '\U000003c0',
+        '\\varrho'               : '\U000003c1',
+        '\\sigma'                : '\U000003c3',
+        '\\tau'                  : '\U000003c4',
+        '\\upsilon'              : '\U000003c5',
+        '\\varphi'               : '\U000003c6',
+        '\\chi'                  : '\U000003c7',
+        '\\psi'                  : '\U000003c8',
+        '\\omega'                : '\U000003c9',
+        '\\Gamma'                : '\U00000393',
+        '\\Delta'                : '\U00000394',
+        '\\Theta'                : '\U00000398',
+        '\\Lambda'               : '\U0000039b',
+        '\\Xi'                   : '\U0000039e',
+        '\\Pi'                   : '\U000003a0',
+        '\\Sigma'                : '\U000003a3',
+        '\\Upsilon'              : '\U000003a5',
+        '\\Phi'                  : '\U000003a6',
+        '\\Psi'                  : '\U000003a8',
+        '\\Omega'                : '\U000003a9',
+        '\\leftarrow'            : '\U00002190',
+        '\\longleftarrow'        : '\U000027f5',
+        '\\rightarrow'           : '\U00002192',
+        '\\longrightarrow'       : '\U000027f6',
+        '\\Leftarrow'            : '\U000021d0',
+        '\\Longleftarrow'        : '\U000027f8',
+        '\\Rightarrow'           : '\U000021d2',
+        '\\Longrightarrow'       : '\U000027f9',
+        '\\leftrightarrow'       : '\U00002194',
+        '\\longleftrightarrow'   : '\U000027f7',
+        '\\Leftrightarrow'       : '\U000021d4',
+        '\\Longleftrightarrow'   : '\U000027fa',
+        '\\mapsto'               : '\U000021a6',
+        '\\longmapsto'           : '\U000027fc',
+        '\\relbar'               : '\U00002500',
+        '\\Relbar'               : '\U00002550',
+        '\\hookleftarrow'        : '\U000021a9',
+        '\\hookrightarrow'       : '\U000021aa',
+        '\\leftharpoondown'      : '\U000021bd',
+        '\\rightharpoondown'     : '\U000021c1',
+        '\\leftharpoonup'        : '\U000021bc',
+        '\\rightharpoonup'       : '\U000021c0',
+        '\\rightleftharpoons'    : '\U000021cc',
+        '\\leadsto'              : '\U0000219d',
+        '\\downharpoonleft'      : '\U000021c3',
+        '\\downharpoonright'     : '\U000021c2',
+        '\\upharpoonleft'        : '\U000021bf',
+        '\\upharpoonright'       : '\U000021be',
+        '\\restriction'          : '\U000021be',
+        '\\uparrow'              : '\U00002191',
+        '\\Uparrow'              : '\U000021d1',
+        '\\downarrow'            : '\U00002193',
+        '\\Downarrow'            : '\U000021d3',
+        '\\updownarrow'          : '\U00002195',
+        '\\Updownarrow'          : '\U000021d5',
+        '\\langle'               : '\U000027e8',
+        '\\rangle'               : '\U000027e9',
+        '\\lceil'                : '\U00002308',
+        '\\rceil'                : '\U00002309',
+        '\\lfloor'               : '\U0000230a',
+        '\\rfloor'               : '\U0000230b',
+        '\\flqq'                 : '\U000000ab',
+        '\\frqq'                 : '\U000000bb',
+        '\\bot'                  : '\U000022a5',
+        '\\top'                  : '\U000022a4',
+        '\\wedge'                : '\U00002227',
+        '\\bigwedge'             : '\U000022c0',
+        '\\vee'                  : '\U00002228',
+        '\\bigvee'               : '\U000022c1',
+        '\\forall'               : '\U00002200',
+        '\\exists'               : '\U00002203',
+        '\\nexists'              : '\U00002204',
+        '\\neg'                  : '\U000000ac',
+        '\\Box'                  : '\U000025a1',
+        '\\Diamond'              : '\U000025c7',
+        '\\vdash'                : '\U000022a2',
+        '\\models'               : '\U000022a8',
+        '\\dashv'                : '\U000022a3',
+        '\\surd'                 : '\U0000221a',
+        '\\le'                   : '\U00002264',
+        '\\ge'                   : '\U00002265',
+        '\\ll'                   : '\U0000226a',
+        '\\gg'                   : '\U0000226b',
+        '\\lesssim'              : '\U00002272',
+        '\\gtrsim'               : '\U00002273',
+        '\\lessapprox'           : '\U00002a85',
+        '\\gtrapprox'            : '\U00002a86',
+        '\\in'                   : '\U00002208',
+        '\\notin'                : '\U00002209',
+        '\\subset'               : '\U00002282',
+        '\\supset'               : '\U00002283',
+        '\\subseteq'             : '\U00002286',
+        '\\supseteq'             : '\U00002287',
+        '\\sqsubset'             : '\U0000228f',
+        '\\sqsupset'             : '\U00002290',
+        '\\sqsubseteq'           : '\U00002291',
+        '\\sqsupseteq'           : '\U00002292',
+        '\\cap'                  : '\U00002229',
+        '\\bigcap'               : '\U000022c2',
+        '\\cup'                  : '\U0000222a',
+        '\\bigcup'               : '\U000022c3',
+        '\\sqcup'                : '\U00002294',
+        '\\bigsqcup'             : '\U00002a06',
+        '\\sqcap'                : '\U00002293',
+        '\\Bigsqcap'             : '\U00002a05',
+        '\\setminus'             : '\U00002216',
+        '\\propto'               : '\U0000221d',
+        '\\uplus'                : '\U0000228e',
+        '\\bigplus'              : '\U00002a04',
+        '\\sim'                  : '\U0000223c',
+        '\\doteq'                : '\U00002250',
+        '\\simeq'                : '\U00002243',
+        '\\approx'               : '\U00002248',
+        '\\asymp'                : '\U0000224d',
+        '\\cong'                 : '\U00002245',
+        '\\equiv'                : '\U00002261',
+        '\\Join'                 : '\U000022c8',
+        '\\bowtie'               : '\U00002a1d',
+        '\\prec'                 : '\U0000227a',
+        '\\succ'                 : '\U0000227b',
+        '\\preceq'               : '\U0000227c',
+        '\\succeq'               : '\U0000227d',
+        '\\parallel'             : '\U00002225',
+        '\\mid'                  : '\U000000a6',
+        '\\pm'                   : '\U000000b1',
+        '\\mp'                   : '\U00002213',
+        '\\times'                : '\U000000d7',
+        '\\div'                  : '\U000000f7',
+        '\\cdot'                 : '\U000022c5',
+        '\\star'                 : '\U000022c6',
+        '\\circ'                 : '\U00002218',
+        '\\dagger'               : '\U00002020',
+        '\\ddagger'              : '\U00002021',
+        '\\lhd'                  : '\U000022b2',
+        '\\rhd'                  : '\U000022b3',
+        '\\unlhd'                : '\U000022b4',
+        '\\unrhd'                : '\U000022b5',
+        '\\triangleleft'         : '\U000025c3',
+        '\\triangleright'        : '\U000025b9',
+        '\\triangle'             : '\U000025b3',
+        '\\triangleq'            : '\U0000225c',
+        '\\oplus'                : '\U00002295',
+        '\\bigoplus'             : '\U00002a01',
+        '\\otimes'               : '\U00002297',
+        '\\bigotimes'            : '\U00002a02',
+        '\\odot'                 : '\U00002299',
+        '\\bigodot'              : '\U00002a00',
+        '\\ominus'               : '\U00002296',
+        '\\oslash'               : '\U00002298',
+        '\\dots'                 : '\U00002026',
+        '\\cdots'                : '\U000022ef',
+        '\\sum'                  : '\U00002211',
+        '\\prod'                 : '\U0000220f',
+        '\\coprod'               : '\U00002210',
+        '\\infty'                : '\U0000221e',
+        '\\int'                  : '\U0000222b',
+        '\\oint'                 : '\U0000222e',
+        '\\clubsuit'             : '\U00002663',
+        '\\diamondsuit'          : '\U00002662',
+        '\\heartsuit'            : '\U00002661',
+        '\\spadesuit'            : '\U00002660',
+        '\\aleph'                : '\U00002135',
+        '\\emptyset'             : '\U00002205',
+        '\\nabla'                : '\U00002207',
+        '\\partial'              : '\U00002202',
+        '\\flat'                 : '\U0000266d',
+        '\\natural'              : '\U0000266e',
+        '\\sharp'                : '\U0000266f',
+        '\\angle'                : '\U00002220',
+        '\\copyright'            : '\U000000a9',
+        '\\textregistered'       : '\U000000ae',
+        '\\textonequarter'       : '\U000000bc',
+        '\\textonehalf'          : '\U000000bd',
+        '\\textthreequarters'    : '\U000000be',
+        '\\textordfeminine'      : '\U000000aa',
+        '\\textordmasculine'     : '\U000000ba',
+        '\\euro'                 : '\U000020ac',
+        '\\pounds'               : '\U000000a3',
+        '\\yen'                  : '\U000000a5',
+        '\\textcent'             : '\U000000a2',
+        '\\textcurrency'         : '\U000000a4',
+        '\\textdegree'           : '\U000000b0',
+    }
+    isabelle_symbols = {
+        '\\<zero>'                 : '\U0001d7ec',
+        '\\<one>'                  : '\U0001d7ed',
+        '\\<two>'                  : '\U0001d7ee',
+        '\\<three>'                : '\U0001d7ef',
+        '\\<four>'                 : '\U0001d7f0',
+        '\\<five>'                 : '\U0001d7f1',
+        '\\<six>'                  : '\U0001d7f2',
+        '\\<seven>'                : '\U0001d7f3',
+        '\\<eight>'                : '\U0001d7f4',
+        '\\<nine>'                 : '\U0001d7f5',
+        '\\<A>'                    : '\U0001d49c',
+        '\\<B>'                    : '\U0000212c',
+        '\\<C>'                    : '\U0001d49e',
+        '\\<D>'                    : '\U0001d49f',
+        '\\<E>'                    : '\U00002130',
+        '\\<F>'                    : '\U00002131',
+        '\\<G>'                    : '\U0001d4a2',
+        '\\<H>'                    : '\U0000210b',
+        '\\<I>'                    : '\U00002110',
+        '\\<J>'                    : '\U0001d4a5',
+        '\\<K>'                    : '\U0001d4a6',
+        '\\<L>'                    : '\U00002112',
+        '\\<M>'                    : '\U00002133',
+        '\\<N>'                    : '\U0001d4a9',
+        '\\<O>'                    : '\U0001d4aa',
+        '\\<P>'                    : '\U0001d4ab',
+        '\\<Q>'                    : '\U0001d4ac',
+        '\\<R>'                    : '\U0000211b',
+        '\\<S>'                    : '\U0001d4ae',
+        '\\<T>'                    : '\U0001d4af',
+        '\\<U>'                    : '\U0001d4b0',
+        '\\<V>'                    : '\U0001d4b1',
+        '\\<W>'                    : '\U0001d4b2',
+        '\\<X>'                    : '\U0001d4b3',
+        '\\<Y>'                    : '\U0001d4b4',
+        '\\<Z>'                    : '\U0001d4b5',
+        '\\<a>'                    : '\U0001d5ba',
+        '\\<b>'                    : '\U0001d5bb',
+        '\\<c>'                    : '\U0001d5bc',
+        '\\<d>'                    : '\U0001d5bd',
+        '\\<e>'                    : '\U0001d5be',
+        '\\<f>'                    : '\U0001d5bf',
+        '\\<g>'                    : '\U0001d5c0',
+        '\\<h>'                    : '\U0001d5c1',
+        '\\<i>'                    : '\U0001d5c2',
+        '\\<j>'                    : '\U0001d5c3',
+        '\\<k>'                    : '\U0001d5c4',
+        '\\<l>'                    : '\U0001d5c5',
+        '\\<m>'                    : '\U0001d5c6',
+        '\\<n>'                    : '\U0001d5c7',
+        '\\<o>'                    : '\U0001d5c8',
+        '\\<p>'                    : '\U0001d5c9',
+        '\\<q>'                    : '\U0001d5ca',
+        '\\<r>'                    : '\U0001d5cb',
+        '\\<s>'                    : '\U0001d5cc',
+        '\\<t>'                    : '\U0001d5cd',
+        '\\<u>'                    : '\U0001d5ce',
+        '\\<v>'                    : '\U0001d5cf',
+        '\\<w>'                    : '\U0001d5d0',
+        '\\<x>'                    : '\U0001d5d1',
+        '\\<y>'                    : '\U0001d5d2',
+        '\\<z>'                    : '\U0001d5d3',
+        '\\<AA>'                   : '\U0001d504',
+        '\\<BB>'                   : '\U0001d505',
+        '\\<CC>'                   : '\U0000212d',
+        '\\<DD>'                   : '\U0001d507',
+        '\\<EE>'                   : '\U0001d508',
+        '\\<FF>'                   : '\U0001d509',
+        '\\<GG>'                   : '\U0001d50a',
+        '\\<HH>'                   : '\U0000210c',
+        '\\<II>'                   : '\U00002111',
+        '\\<JJ>'                   : '\U0001d50d',
+        '\\<KK>'                   : '\U0001d50e',
+        '\\<LL>'                   : '\U0001d50f',
+        '\\<MM>'                   : '\U0001d510',
+        '\\<NN>'                   : '\U0001d511',
+        '\\<OO>'                   : '\U0001d512',
+        '\\<PP>'                   : '\U0001d513',
+        '\\<QQ>'                   : '\U0001d514',
+        '\\<RR>'                   : '\U0000211c',
+        '\\<SS>'                   : '\U0001d516',
+        '\\<TT>'                   : '\U0001d517',
+        '\\<UU>'                   : '\U0001d518',
+        '\\<VV>'                   : '\U0001d519',
+        '\\<WW>'                   : '\U0001d51a',
+        '\\<XX>'                   : '\U0001d51b',
+        '\\<YY>'                   : '\U0001d51c',
+        '\\<ZZ>'                   : '\U00002128',
+        '\\<aa>'                   : '\U0001d51e',
+        '\\<bb>'                   : '\U0001d51f',
+        '\\<cc>'                   : '\U0001d520',
+        '\\<dd>'                   : '\U0001d521',
+        '\\<ee>'                   : '\U0001d522',
+        '\\<ff>'                   : '\U0001d523',
+        '\\<gg>'                   : '\U0001d524',
+        '\\<hh>'                   : '\U0001d525',
+        '\\<ii>'                   : '\U0001d526',
+        '\\<jj>'                   : '\U0001d527',
+        '\\<kk>'                   : '\U0001d528',
+        '\\<ll>'                   : '\U0001d529',
+        '\\<mm>'                   : '\U0001d52a',
+        '\\<nn>'                   : '\U0001d52b',
+        '\\<oo>'                   : '\U0001d52c',
+        '\\<pp>'                   : '\U0001d52d',
+        '\\<qq>'                   : '\U0001d52e',
+        '\\<rr>'                   : '\U0001d52f',
+        '\\<ss>'                   : '\U0001d530',
+        '\\<tt>'                   : '\U0001d531',
+        '\\<uu>'                   : '\U0001d532',
+        '\\<vv>'                   : '\U0001d533',
+        '\\<ww>'                   : '\U0001d534',
+        '\\<xx>'                   : '\U0001d535',
+        '\\<yy>'                   : '\U0001d536',
+        '\\<zz>'                   : '\U0001d537',
+        '\\<alpha>'                : '\U000003b1',
+        '\\<beta>'                 : '\U000003b2',
+        '\\<gamma>'                : '\U000003b3',
+        '\\<delta>'                : '\U000003b4',
+        '\\<epsilon>'              : '\U000003b5',
+        '\\<zeta>'                 : '\U000003b6',
+        '\\<eta>'                  : '\U000003b7',
+        '\\<theta>'                : '\U000003b8',
+        '\\<iota>'                 : '\U000003b9',
+        '\\<kappa>'                : '\U000003ba',
+        '\\<lambda>'               : '\U000003bb',
+        '\\<mu>'                   : '\U000003bc',
+        '\\<nu>'                   : '\U000003bd',
+        '\\<xi>'                   : '\U000003be',
+        '\\<pi>'                   : '\U000003c0',
+        '\\<rho>'                  : '\U000003c1',
+        '\\<sigma>'                : '\U000003c3',
+        '\\<tau>'                  : '\U000003c4',
+        '\\<upsilon>'              : '\U000003c5',
+        '\\<phi>'                  : '\U000003c6',
+        '\\<chi>'                  : '\U000003c7',
+        '\\<psi>'                  : '\U000003c8',
+        '\\<omega>'                : '\U000003c9',
+        '\\<Gamma>'                : '\U00000393',
+        '\\<Delta>'                : '\U00000394',
+        '\\<Theta>'                : '\U00000398',
+        '\\<Lambda>'               : '\U0000039b',
+        '\\<Xi>'                   : '\U0000039e',
+        '\\<Pi>'                   : '\U000003a0',
+        '\\<Sigma>'                : '\U000003a3',
+        '\\<Upsilon>'              : '\U000003a5',
+        '\\<Phi>'                  : '\U000003a6',
+        '\\<Psi>'                  : '\U000003a8',
+        '\\<Omega>'                : '\U000003a9',
+        '\\<bool>'                 : '\U0001d539',
+        '\\<complex>'              : '\U00002102',
+        '\\<nat>'                  : '\U00002115',
+        '\\<rat>'                  : '\U0000211a',
+        '\\<real>'                 : '\U0000211d',
+        '\\<int>'                  : '\U00002124',
+        '\\<leftarrow>'            : '\U00002190',
+        '\\<longleftarrow>'        : '\U000027f5',
+        '\\<rightarrow>'           : '\U00002192',
+        '\\<longrightarrow>'       : '\U000027f6',
+        '\\<Leftarrow>'            : '\U000021d0',
+        '\\<Longleftarrow>'        : '\U000027f8',
+        '\\<Rightarrow>'           : '\U000021d2',
+        '\\<Longrightarrow>'       : '\U000027f9',
+        '\\<leftrightarrow>'       : '\U00002194',
+        '\\<longleftrightarrow>'   : '\U000027f7',
+        '\\<Leftrightarrow>'       : '\U000021d4',
+        '\\<Longleftrightarrow>'   : '\U000027fa',
+        '\\<mapsto>'               : '\U000021a6',
+        '\\<longmapsto>'           : '\U000027fc',
+        '\\<midarrow>'             : '\U00002500',
+        '\\<Midarrow>'             : '\U00002550',
+        '\\<hookleftarrow>'        : '\U000021a9',
+        '\\<hookrightarrow>'       : '\U000021aa',
+        '\\<leftharpoondown>'      : '\U000021bd',
+        '\\<rightharpoondown>'     : '\U000021c1',
+        '\\<leftharpoonup>'        : '\U000021bc',
+        '\\<rightharpoonup>'       : '\U000021c0',
+        '\\<rightleftharpoons>'    : '\U000021cc',
+        '\\<leadsto>'              : '\U0000219d',
+        '\\<downharpoonleft>'      : '\U000021c3',
+        '\\<downharpoonright>'     : '\U000021c2',
+        '\\<upharpoonleft>'        : '\U000021bf',
+        '\\<upharpoonright>'       : '\U000021be',
+        '\\<restriction>'          : '\U000021be',
+        '\\<Colon>'                : '\U00002237',
+        '\\<up>'                   : '\U00002191',
+        '\\<Up>'                   : '\U000021d1',
+        '\\<down>'                 : '\U00002193',
+        '\\<Down>'                 : '\U000021d3',
+        '\\<updown>'               : '\U00002195',
+        '\\<Updown>'               : '\U000021d5',
+        '\\<langle>'               : '\U000027e8',
+        '\\<rangle>'               : '\U000027e9',
+        '\\<lceil>'                : '\U00002308',
+        '\\<rceil>'                : '\U00002309',
+        '\\<lfloor>'               : '\U0000230a',
+        '\\<rfloor>'               : '\U0000230b',
+        '\\<lparr>'                : '\U00002987',
+        '\\<rparr>'                : '\U00002988',
+        '\\<lbrakk>'               : '\U000027e6',
+        '\\<rbrakk>'               : '\U000027e7',
+        '\\<lbrace>'               : '\U00002983',
+        '\\<rbrace>'               : '\U00002984',
+        '\\<guillemotleft>'        : '\U000000ab',
+        '\\<guillemotright>'       : '\U000000bb',
+        '\\<bottom>'               : '\U000022a5',
+        '\\<top>'                  : '\U000022a4',
+        '\\<and>'                  : '\U00002227',
+        '\\<And>'                  : '\U000022c0',
+        '\\<or>'                   : '\U00002228',
+        '\\<Or>'                   : '\U000022c1',
+        '\\<forall>'               : '\U00002200',
+        '\\<exists>'               : '\U00002203',
+        '\\<nexists>'              : '\U00002204',
+        '\\<not>'                  : '\U000000ac',
+        '\\<box>'                  : '\U000025a1',
+        '\\<diamond>'              : '\U000025c7',
+        '\\<turnstile>'            : '\U000022a2',
+        '\\<Turnstile>'            : '\U000022a8',
+        '\\<tturnstile>'           : '\U000022a9',
+        '\\<TTurnstile>'           : '\U000022ab',
+        '\\<stileturn>'            : '\U000022a3',
+        '\\<surd>'                 : '\U0000221a',
+        '\\<le>'                   : '\U00002264',
+        '\\<ge>'                   : '\U00002265',
+        '\\<lless>'                : '\U0000226a',
+        '\\<ggreater>'             : '\U0000226b',
+        '\\<lesssim>'              : '\U00002272',
+        '\\<greatersim>'           : '\U00002273',
+        '\\<lessapprox>'           : '\U00002a85',
+        '\\<greaterapprox>'        : '\U00002a86',
+        '\\<in>'                   : '\U00002208',
+        '\\<notin>'                : '\U00002209',
+        '\\<subset>'               : '\U00002282',
+        '\\<supset>'               : '\U00002283',
+        '\\<subseteq>'             : '\U00002286',
+        '\\<supseteq>'             : '\U00002287',
+        '\\<sqsubset>'             : '\U0000228f',
+        '\\<sqsupset>'             : '\U00002290',
+        '\\<sqsubseteq>'           : '\U00002291',
+        '\\<sqsupseteq>'           : '\U00002292',
+        '\\<inter>'                : '\U00002229',
+        '\\<Inter>'                : '\U000022c2',
+        '\\<union>'                : '\U0000222a',
+        '\\<Union>'                : '\U000022c3',
+        '\\<squnion>'              : '\U00002294',
+        '\\<Squnion>'              : '\U00002a06',
+        '\\<sqinter>'              : '\U00002293',
+        '\\<Sqinter>'              : '\U00002a05',
+        '\\<setminus>'             : '\U00002216',
+        '\\<propto>'               : '\U0000221d',
+        '\\<uplus>'                : '\U0000228e',
+        '\\<Uplus>'                : '\U00002a04',
+        '\\<noteq>'                : '\U00002260',
+        '\\<sim>'                  : '\U0000223c',
+        '\\<doteq>'                : '\U00002250',
+        '\\<simeq>'                : '\U00002243',
+        '\\<approx>'               : '\U00002248',
+        '\\<asymp>'                : '\U0000224d',
+        '\\<cong>'                 : '\U00002245',
+        '\\<smile>'                : '\U00002323',
+        '\\<equiv>'                : '\U00002261',
+        '\\<frown>'                : '\U00002322',
+        '\\<Join>'                 : '\U000022c8',
+        '\\<bowtie>'               : '\U00002a1d',
+        '\\<prec>'                 : '\U0000227a',
+        '\\<succ>'                 : '\U0000227b',
+        '\\<preceq>'               : '\U0000227c',
+        '\\<succeq>'               : '\U0000227d',
+        '\\<parallel>'             : '\U00002225',
+        '\\<bar>'                  : '\U000000a6',
+        '\\<plusminus>'            : '\U000000b1',
+        '\\<minusplus>'            : '\U00002213',
+        '\\<times>'                : '\U000000d7',
+        '\\<div>'                  : '\U000000f7',
+        '\\<cdot>'                 : '\U000022c5',
+        '\\<star>'                 : '\U000022c6',
+        '\\<bullet>'               : '\U00002219',
+        '\\<circ>'                 : '\U00002218',
+        '\\<dagger>'               : '\U00002020',
+        '\\<ddagger>'              : '\U00002021',
+        '\\<lhd>'                  : '\U000022b2',
+        '\\<rhd>'                  : '\U000022b3',
+        '\\<unlhd>'                : '\U000022b4',
+        '\\<unrhd>'                : '\U000022b5',
+        '\\<triangleleft>'         : '\U000025c3',
+        '\\<triangleright>'        : '\U000025b9',
+        '\\<triangle>'             : '\U000025b3',
+        '\\<triangleq>'            : '\U0000225c',
+        '\\<oplus>'                : '\U00002295',
+        '\\<Oplus>'                : '\U00002a01',
+        '\\<otimes>'               : '\U00002297',
+        '\\<Otimes>'               : '\U00002a02',
+        '\\<odot>'                 : '\U00002299',
+        '\\<Odot>'                 : '\U00002a00',
+        '\\<ominus>'               : '\U00002296',
+        '\\<oslash>'               : '\U00002298',
+        '\\<dots>'                 : '\U00002026',
+        '\\<cdots>'                : '\U000022ef',
+        '\\<Sum>'                  : '\U00002211',
+        '\\<Prod>'                 : '\U0000220f',
+        '\\<Coprod>'               : '\U00002210',
+        '\\<infinity>'             : '\U0000221e',
+        '\\<integral>'             : '\U0000222b',
+        '\\<ointegral>'            : '\U0000222e',
+        '\\<clubsuit>'             : '\U00002663',
+        '\\<diamondsuit>'          : '\U00002662',
+        '\\<heartsuit>'            : '\U00002661',
+        '\\<spadesuit>'            : '\U00002660',
+        '\\<aleph>'                : '\U00002135',
+        '\\<emptyset>'             : '\U00002205',
+        '\\<nabla>'                : '\U00002207',
+        '\\<partial>'              : '\U00002202',
+        '\\<flat>'                 : '\U0000266d',
+        '\\<natural>'              : '\U0000266e',
+        '\\<sharp>'                : '\U0000266f',
+        '\\<angle>'                : '\U00002220',
+        '\\<copyright>'            : '\U000000a9',
+        '\\<registered>'           : '\U000000ae',
+        '\\<hyphen>'               : '\U000000ad',
+        '\\<inverse>'              : '\U000000af',
+        '\\<onequarter>'           : '\U000000bc',
+        '\\<onehalf>'              : '\U000000bd',
+        '\\<threequarters>'        : '\U000000be',
+        '\\<ordfeminine>'          : '\U000000aa',
+        '\\<ordmasculine>'         : '\U000000ba',
+        '\\<section>'              : '\U000000a7',
+        '\\<paragraph>'            : '\U000000b6',
+        '\\<exclamdown>'           : '\U000000a1',
+        '\\<questiondown>'         : '\U000000bf',
+        '\\<euro>'                 : '\U000020ac',
+        '\\<pounds>'               : '\U000000a3',
+        '\\<yen>'                  : '\U000000a5',
+        '\\<cent>'                 : '\U000000a2',
+        '\\<currency>'             : '\U000000a4',
+        '\\<degree>'               : '\U000000b0',
+        '\\<amalg>'                : '\U00002a3f',
+        '\\<mho>'                  : '\U00002127',
+        '\\<lozenge>'              : '\U000025ca',
+        '\\<wp>'                   : '\U00002118',
+        '\\<wrong>'                : '\U00002240',
+        '\\<struct>'               : '\U000022c4',
+        '\\<acute>'                : '\U000000b4',
+        '\\<index>'                : '\U00000131',
+        '\\<dieresis>'             : '\U000000a8',
+        '\\<cedilla>'              : '\U000000b8',
+        '\\<hungarumlaut>'         : '\U000002dd',
+        '\\<some>'                 : '\U000003f5',
+        '\\<newline>'              : '\U000023ce',
+        '\\<open>'                 : '\U00002039',
+        '\\<close>'                : '\U0000203a',
+        '\\<here>'                 : '\U00002302',
+        '\\<^sub>'                 : '\U000021e9',
+        '\\<^sup>'                 : '\U000021e7',
+        '\\<^bold>'                : '\U00002759',
+        '\\<^bsub>'                : '\U000021d8',
+        '\\<^esub>'                : '\U000021d9',
+        '\\<^bsup>'                : '\U000021d7',
+        '\\<^esup>'                : '\U000021d6',
+    }
+    lang_map = {'isabelle' : isabelle_symbols, 'latex' : latex_symbols}
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+        lang = get_choice_opt(options, 'lang',
+                              ['isabelle', 'latex'], 'isabelle')
+        self.symbols = self.lang_map[lang]
+    def filter(self, lexer, stream):
+        for ttype, value in stream:
+            if value in self.symbols:
+                yield ttype, self.symbols[value]
+            else:
+                yield ttype, value
+class KeywordCaseFilter(Filter):
+    """Convert keywords to lowercase or uppercase or capitalize them, which
+    means first letter uppercase, rest lowercase.
+    This can be useful e.g. if you highlight Pascal code and want to adapt the
+    code to your styleguide.
+    Options accepted:
+    `case` : string
+       The casing to convert keywords to. Must be one of ``'lower'``,
+       ``'upper'`` or ``'capitalize'``.  The default is ``'lower'``.
+    """
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+        case = get_choice_opt(options, 'case',
+                              ['lower', 'upper', 'capitalize'], 'lower')
+        self.convert = getattr(str, case)
+    def filter(self, lexer, stream):
+        for ttype, value in stream:
+            if ttype in Keyword:
+                yield ttype, self.convert(value)
+            else:
+                yield ttype, value
+class NameHighlightFilter(Filter):
+    """Highlight a normal Name (and Name.*) token with a different token type.
+    Example::
+        filter = NameHighlightFilter(
+            names=['foo', 'bar', 'baz'],
+            tokentype=Name.Function,
+        )
+    This would highlight the names "foo", "bar" and "baz"
+    as functions. `Name.Function` is the default token type.
+    Options accepted:
+    `names` : list of strings
+      A list of names that should be given the different token type.
+      There is no default.
+    `tokentype` : TokenType or string
+      A token type or a string containing a token type name that is
+      used for highlighting the strings in `names`.  The default is
+      `Name.Function`.
+    """
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+        self.names = set(get_list_opt(options, 'names', []))
+        tokentype = options.get('tokentype')
+        if tokentype:
+            self.tokentype = string_to_tokentype(tokentype)
+        else:
+            self.tokentype = Name.Function
+    def filter(self, lexer, stream):
+        for ttype, value in stream:
+            if ttype in Name and value in self.names:
+                yield self.tokentype, value
+            else:
+                yield ttype, value
+class ErrorToken(Exception):
+    pass
+class RaiseOnErrorTokenFilter(Filter):
+    """Raise an exception when the lexer generates an error token.
+    Options accepted:
+    `excclass` : Exception class
+      The exception class to raise.
+      The default is `pygments.filters.ErrorToken`.
+    .. versionadded:: 0.8
+    """
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+        self.exception = options.get('excclass', ErrorToken)
+        try:
+            # issubclass() will raise TypeError if first argument is not a class
+            if not issubclass(self.exception, Exception):
+                raise TypeError
+        except TypeError:
+            raise OptionError('excclass option is not an exception class')
+    def filter(self, lexer, stream):
+        for ttype, value in stream:
+            if ttype is Error:
+                raise self.exception(value)
+            yield ttype, value
+class VisibleWhitespaceFilter(Filter):
+    """Convert tabs, newlines and/or spaces to visible characters.
+    Options accepted:
+    `spaces` : string or bool
+      If this is a one-character string, spaces will be replaces by this string.
+      If it is another true value, spaces will be replaced by ``·`` (unicode
+      MIDDLE DOT).  If it is a false value, spaces will not be replaced.  The
+      default is ``False``.
+    `tabs` : string or bool
+      The same as for `spaces`, but the default replacement character is ``»``
+      (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK).  The default value
+      is ``False``.  Note: this will not work if the `tabsize` option for the
+      lexer is nonzero, as tabs will already have been expanded then.
+    `tabsize` : int
+      If tabs are to be replaced by this filter (see the `tabs` option), this
+      is the total number of characters that a tab should be expanded to.
+      The default is ``8``.
+    `newlines` : string or bool
+      The same as for `spaces`, but the default replacement character is ``¶``
+      (unicode PILCROW SIGN).  The default value is ``False``.
+    `wstokentype` : bool
+      If true, give whitespace the special `Whitespace` token type.  This allows
+      styling the visible whitespace differently (e.g. greyed out), but it can
+      disrupt background colors.  The default is ``True``.
+    .. versionadded:: 0.8
+    """
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+        for name, default in [('spaces',   '·'),
+                              ('tabs',     '»'),
+                              ('newlines', '¶')]:
+            opt = options.get(name, False)
+            if isinstance(opt, str) and len(opt) == 1:
+                setattr(self, name, opt)
+            else:
+                setattr(self, name, (opt and default or ''))
+        tabsize = get_int_opt(options, 'tabsize', 8)
+        if self.tabs:
+            self.tabs += ' ' * (tabsize - 1)
+        if self.newlines:
+            self.newlines += '\n'
+        self.wstt = get_bool_opt(options, 'wstokentype', True)
+    def filter(self, lexer, stream):
+        if self.wstt:
+            spaces = self.spaces or ' '
+            tabs = self.tabs or '\t'
+            newlines = self.newlines or '\n'
+            regex = re.compile(r'\s')
+            def replacefunc(wschar):
+                if wschar == ' ':
+                    return spaces
+                elif wschar == '\t':
+                    return tabs
+                elif wschar == '\n':
+                    return newlines
+                return wschar
+            for ttype, value in stream:
+                yield from _replace_special(ttype, value, regex, Whitespace,
+                                            replacefunc)
+        else:
+            spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
+            # simpler processing
+            for ttype, value in stream:
+                if spaces:
+                    value = value.replace(' ', spaces)
+                if tabs:
+                    value = value.replace('\t', tabs)
+                if newlines:
+                    value = value.replace('\n', newlines)
+                yield ttype, value
+class GobbleFilter(Filter):
+    """Gobbles source code lines (eats initial characters).
+    This filter drops the first ``n`` characters off every line of code.  This
+    may be useful when the source code fed to the lexer is indented by a fixed
+    amount of space that isn't desired in the output.
+    Options accepted:
+    `n` : int
+       The number of characters to gobble.
+    .. versionadded:: 1.2
+    """
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+        self.n = get_int_opt(options, 'n', 0)
+    def gobble(self, value, left):
+        if left < len(value):
+            return value[left:], 0
+        else:
+            return '', left - len(value)
+    def filter(self, lexer, stream):
+        n = self.n
+        left = n  # How many characters left to gobble.
+        for ttype, value in stream:
+            # Remove ``left`` tokens from first line, ``n`` from all others.
+            parts = value.split('\n')
+            (parts[0], left) = self.gobble(parts[0], left)
+            for i in range(1, len(parts)):
+                (parts[i], left) = self.gobble(parts[i], n)
+            value = '\n'.join(parts)
+            if value != '':
+                yield ttype, value
+class TokenMergeFilter(Filter):
+    """Merges consecutive tokens with the same token type in the output
+    stream of a lexer.
+    .. versionadded:: 1.2
+    """
+    def __init__(self, **options):
+        Filter.__init__(self, **options)
+    def filter(self, lexer, stream):
+        current_type = None
+        current_value = None
+        for ttype, value in stream:
+            if ttype is current_type:
+                current_value += value
+            else:
+                if current_type is not None:
+                    yield current_type, current_value
+                current_type = ttype
+                current_value = value
+        if current_type is not None:
+            yield current_type, current_value
+FILTERS = {
+    'codetagify':     CodeTagFilter,
+    'keywordcase':    KeywordCaseFilter,
+    'highlight':      NameHighlightFilter,
+    'raiseonerror':   RaiseOnErrorTokenFilter,
+    'whitespace':     VisibleWhitespaceFilter,
+    'gobble':         GobbleFilter,
+    'tokenmerge':     TokenMergeFilter,
+    'symbols':        SymbolFilter,
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/filters/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (40.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/formatters/rtf.py ADDED Viewed

	@@ -0,0 +1,349 @@

+"""
+    pygments.formatters.rtf
+    ~~~~~~~~~~~~~~~~~~~~~~~
+    A formatter that generates RTF files.
+    :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+from collections import OrderedDict
+from pip._vendor.pygments.formatter import Formatter
+from pip._vendor.pygments.style import _ansimap
+from pip._vendor.pygments.util import get_bool_opt, get_int_opt, get_list_opt, surrogatepair
+__all__ = ['RtfFormatter']
+class RtfFormatter(Formatter):
+    """
+    Format tokens as RTF markup. This formatter automatically outputs full RTF
+    documents with color information and other useful stuff. Perfect for Copy and
+    Paste into Microsoft(R) Word(R) documents.
+    Please note that ``encoding`` and ``outencoding`` options are ignored.
+    The RTF format is ASCII natively, but handles unicode characters correctly
+    thanks to escape sequences.
+    .. versionadded:: 0.6
+    Additional options accepted:
+    `style`
+        The style to use, can be a string or a Style subclass (default:
+        ``'default'``).
+    `fontface`
+        The used font family, for example ``Bitstream Vera Sans``. Defaults to
+        some generic font which is supposed to have fixed width.
+    `fontsize`
+        Size of the font used. Size is specified in half points. The
+        default is 24 half-points, giving a size 12 font.
+        .. versionadded:: 2.0
+    `linenos`
+        Turn on line numbering (default: ``False``).
+        .. versionadded:: 2.18
+    `lineno_fontsize`
+        Font size for line numbers. Size is specified in half points
+        (default: `fontsize`).
+        .. versionadded:: 2.18
+    `lineno_padding`
+        Number of spaces between the (inline) line numbers and the
+        source code (default: ``2``).
+        .. versionadded:: 2.18
+    `linenostart`
+        The line number for the first line (default: ``1``).
+        .. versionadded:: 2.18
+    `linenostep`
+        If set to a number n > 1, only every nth line number is printed.
+        .. versionadded:: 2.18
+    `lineno_color`
+        Color for line numbers specified as a hex triplet, e.g. ``'5e5e5e'``.
+        Defaults to the style's line number color if it is a hex triplet,
+        otherwise ansi bright black.
+        .. versionadded:: 2.18
+    `hl_lines`
+        Specify a list of lines to be highlighted, as line numbers separated by
+        spaces, e.g. ``'3 7 8'``. The line numbers are relative to the input
+        (i.e. the first line is line 1) unless `hl_linenostart` is set.
+        .. versionadded:: 2.18
+    `hl_color`
+        Color for highlighting the lines specified in `hl_lines`, specified as
+        a hex triplet (default: style's `highlight_color`).
+        .. versionadded:: 2.18
+    `hl_linenostart`
+        If set to ``True`` line numbers in `hl_lines` are specified
+        relative to `linenostart` (default ``False``).
+        .. versionadded:: 2.18
+    """
+    name = 'RTF'
+    aliases = ['rtf']
+    filenames = ['*.rtf']
+    def __init__(self, **options):
+        r"""
+        Additional options accepted:
+        ``fontface``
+            Name of the font used. Could for example be ``'Courier New'``
+            to further specify the default which is ``'\fmodern'``. The RTF
+            specification claims that ``\fmodern`` are "Fixed-pitch serif
+            and sans serif fonts". Hope every RTF implementation thinks
+            the same about modern...
+        """
+        Formatter.__init__(self, **options)
+        self.fontface = options.get('fontface') or ''
+        self.fontsize = get_int_opt(options, 'fontsize', 0)
+        self.linenos = get_bool_opt(options, 'linenos', False)
+        self.lineno_fontsize = get_int_opt(options, 'lineno_fontsize',
+                                           self.fontsize)
+        self.lineno_padding = get_int_opt(options, 'lineno_padding', 2)
+        self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
+        self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
+        self.hl_linenostart = get_bool_opt(options, 'hl_linenostart', False)
+        self.hl_color = options.get('hl_color', '')
+        if not self.hl_color:
+            self.hl_color = self.style.highlight_color
+        self.hl_lines = []
+        for lineno in get_list_opt(options, 'hl_lines', []):
+            try:
+                lineno = int(lineno)
+                if self.hl_linenostart:
+                    lineno = lineno - self.linenostart + 1
+                self.hl_lines.append(lineno)
+            except ValueError:
+                pass
+        self.lineno_color = options.get('lineno_color', '')
+        if not self.lineno_color:
+            if  self.style.line_number_color == 'inherit':
+                # style color is the css value 'inherit'
+                # default to ansi bright-black
+                self.lineno_color = _ansimap['ansibrightblack']
+            else:
+                # style color is assumed to be a hex triplet as other
+                # colors in pygments/style.py
+                self.lineno_color = self.style.line_number_color
+        self.color_mapping = self._create_color_mapping()
+    def _escape(self, text):
+        return text.replace('\\', '\\\\') \
+                   .replace('{', '\\{') \
+                   .replace('}', '\\}')
+    def _escape_text(self, text):
+        # empty strings, should give a small performance improvement
+        if not text:
+            return ''
+        # escape text
+        text = self._escape(text)
+        buf = []
+        for c in text:
+            cn = ord(c)
+            if cn < (2**7):
+                # ASCII character
+                buf.append(str(c))
+            elif (2**7) <= cn < (2**16):
+                # single unicode escape sequence
+                buf.append('{\\u%d}' % cn)
+            elif (2**16) <= cn:
+                # RTF limits unicode to 16 bits.
+                # Force surrogate pairs
+                buf.append('{\\u%d}{\\u%d}' % surrogatepair(cn))
+        return ''.join(buf).replace('\n', '\\par')
+    @staticmethod
+    def hex_to_rtf_color(hex_color):
+        if hex_color[0] == "#":
+            hex_color = hex_color[1:]
+        return '\\red%d\\green%d\\blue%d;' % (
+                        int(hex_color[0:2], 16),
+                        int(hex_color[2:4], 16),
+                        int(hex_color[4:6], 16)
+                    )
+    def _split_tokens_on_newlines(self, tokensource):
+        """
+        Split tokens containing newline characters into multiple token
+        each representing a line of the input file. Needed for numbering
+        lines of e.g. multiline comments.
+        """
+        for ttype, value in tokensource:
+            if value == '\n':
+                yield (ttype, value)
+            elif "\n" in value:
+                lines = value.split("\n")
+                for line in lines[:-1]:
+                    yield (ttype, line+"\n")
+                if lines[-1]:
+                    yield (ttype, lines[-1])
+            else:
+                yield (ttype, value)
+    def _create_color_mapping(self):
+        """
+        Create a mapping of style hex colors to index/offset in
+        the RTF color table.
+        """
+        color_mapping = OrderedDict()
+        offset = 1
+        if self.linenos:
+            color_mapping[self.lineno_color] = offset
+            offset += 1
+        if self.hl_lines:
+            color_mapping[self.hl_color] = offset
+            offset += 1
+        for _, style in self.style:
+            for color in style['color'], style['bgcolor'], style['border']:
+                if color and color not in color_mapping:
+                    color_mapping[color] = offset
+                    offset += 1
+        return color_mapping
+    @property
+    def _lineno_template(self):
+        if self.lineno_fontsize != self.fontsize:
+            return '{{\\fs{} \\cf{} %s{}}}'.format(self.lineno_fontsize,
+                          self.color_mapping[self.lineno_color],
+                          " " * self.lineno_padding)
+        return '{{\\cf{} %s{}}}'.format(self.color_mapping[self.lineno_color],
+                      " " * self.lineno_padding)
+    @property
+    def _hl_open_str(self):
+        return rf'{{\highlight{self.color_mapping[self.hl_color]} '
+    @property
+    def _rtf_header(self):
+        lines = []
+        # rtf 1.8 header
+        lines.append('{\\rtf1\\ansi\\uc0\\deff0'
+                     '{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
+                     % (self.fontface and ' '
+                        + self._escape(self.fontface) or ''))
+        # color table
+        lines.append('{\\colortbl;')
+        for color, _ in self.color_mapping.items():
+            lines.append(self.hex_to_rtf_color(color))
+        lines.append('}')
+        # font and fontsize
+        lines.append('\\f0\\sa0')
+        if self.fontsize:
+            lines.append('\\fs%d' % self.fontsize)
+        # ensure Libre Office Writer imports and renders consecutive
+        # space characters the same width, needed for line numbering.
+        # https://bugs.documentfoundation.org/show_bug.cgi?id=144050
+        lines.append('\\dntblnsbdb')
+        return lines
+    def format_unencoded(self, tokensource, outfile):
+        for line in self._rtf_header:
+            outfile.write(line + "\n")
+        tokensource = self._split_tokens_on_newlines(tokensource)
+        # first pass of tokens to count lines, needed for line numbering
+        if self.linenos:
+            line_count = 0
+            tokens = [] # for copying the token source generator
+            for ttype, value in tokensource:
+                tokens.append((ttype, value))
+                if value.endswith("\n"):
+                    line_count += 1
+            # width of line number strings (for padding with spaces)
+            linenos_width = len(str(line_count+self.linenostart-1))
+            tokensource = tokens
+        # highlight stream
+        lineno = 1
+        start_new_line = True
+        for ttype, value in tokensource:
+            if start_new_line and lineno in self.hl_lines:
+                outfile.write(self._hl_open_str)
+            if start_new_line and self.linenos:
+                if (lineno-self.linenostart+1)%self.linenostep == 0:
+                    current_lineno = lineno + self.linenostart - 1
+                    lineno_str = str(current_lineno).rjust(linenos_width)
+                else:
+                    lineno_str = "".rjust(linenos_width)
+                outfile.write(self._lineno_template % lineno_str)
+            while not self.style.styles_token(ttype) and ttype.parent:
+                ttype = ttype.parent
+            style = self.style.style_for_token(ttype)
+            buf = []
+            if style['bgcolor']:
+                buf.append('\\cb%d' % self.color_mapping[style['bgcolor']])
+            if style['color']:
+                buf.append('\\cf%d' % self.color_mapping[style['color']])
+            if style['bold']:
+                buf.append('\\b')
+            if style['italic']:
+                buf.append('\\i')
+            if style['underline']:
+                buf.append('\\ul')
+            if style['border']:
+                buf.append('\\chbrdr\\chcfpat%d' %
+                           self.color_mapping[style['border']])
+            start = ''.join(buf)
+            if start:
+                outfile.write(f'{{{start} ')
+            outfile.write(self._escape_text(value))
+            if start:
+                outfile.write('}')
+            start_new_line = False
+            # complete line of input
+            if value.endswith("\n"):
+                # close line highlighting
+                if lineno in self.hl_lines:
+                    outfile.write('}')
+                # newline in RTF file after closing }
+                outfile.write("\n")
+                start_new_line = True
+                lineno += 1
+        outfile.write('}\n')

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/lexers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (16.5 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/scanner.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""
+    pygments.scanner
+    ~~~~~~~~~~~~~~~~
+    This library implements a regex based scanner. Some languages
+    like Pascal are easy to parse but have some keywords that
+    depend on the context. Because of this it's impossible to lex
+    that just by using a regular expression lexer like the
+    `RegexLexer`.
+    Have a look at the `DelphiLexer` to get an idea of how to use
+    this scanner.
+    :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import re
+class EndOfText(RuntimeError):
+    """
+    Raise if end of text is reached and the user
+    tried to call a match function.
+    """
+class Scanner:
+    """
+    Simple scanner
+    All method patterns are regular expression strings (not
+    compiled expressions!)
+    """
+    def __init__(self, text, flags=0):
+        """
+        :param text:    The text which should be scanned
+        :param flags:   default regular expression flags
+        """
+        self.data = text
+        self.data_length = len(text)
+        self.start_pos = 0
+        self.pos = 0
+        self.flags = flags
+        self.last = None
+        self.match = None
+        self._re_cache = {}
+    def eos(self):
+        """`True` if the scanner reached the end of text."""
+        return self.pos >= self.data_length
+    eos = property(eos, eos.__doc__)
+    def check(self, pattern):
+        """
+        Apply `pattern` on the current position and return
+        the match object. (Doesn't touch pos). Use this for
+        lookahead.
+        """
+        if self.eos:
+            raise EndOfText()
+        if pattern not in self._re_cache:
+            self._re_cache[pattern] = re.compile(pattern, self.flags)
+        return self._re_cache[pattern].match(self.data, self.pos)
+    def test(self, pattern):
+        """Apply a pattern on the current position and check
+        if it patches. Doesn't touch pos.
+        """
+        return self.check(pattern) is not None
+    def scan(self, pattern):
+        """
+        Scan the text for the given pattern and update pos/match
+        and related fields. The return value is a boolean that
+        indicates if the pattern matched. The matched value is
+        stored on the instance as ``match``, the last value is
+        stored as ``last``. ``start_pos`` is the position of the
+        pointer before the pattern was matched, ``pos`` is the
+        end position.
+        """
+        if self.eos:
+            raise EndOfText()
+        if pattern not in self._re_cache:
+            self._re_cache[pattern] = re.compile(pattern, self.flags)
+        self.last = self.match
+        m = self._re_cache[pattern].match(self.data, self.pos)
+        if m is None:
+            return False
+        self.start_pos = m.start()
+        self.pos = m.end()
+        self.match = m.group()
+        return True
+    def get_char(self):
+        """Scan exactly one char."""
+        self.scan('.')
+    def __repr__(self):
+        return '<%s %d/%d>' % (
+            self.__class__.__name__,
+            self.pos,
+            self.data_length
+        )

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/style.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+    pygments.style
+    ~~~~~~~~~~~~~~
+    Basic style object.
+    :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+from pip._vendor.pygments.token import Token, STANDARD_TYPES
+# Default mapping of ansixxx to RGB colors.
+_ansimap = {
+    # dark
+    'ansiblack': '000000',
+    'ansired': '7f0000',
+    'ansigreen': '007f00',
+    'ansiyellow': '7f7fe0',
+    'ansiblue': '00007f',
+    'ansimagenta': '7f007f',
+    'ansicyan': '007f7f',
+    'ansigray': 'e5e5e5',
+    # normal
+    'ansibrightblack': '555555',
+    'ansibrightred': 'ff0000',
+    'ansibrightgreen': '00ff00',
+    'ansibrightyellow': 'ffff00',
+    'ansibrightblue': '0000ff',
+    'ansibrightmagenta': 'ff00ff',
+    'ansibrightcyan': '00ffff',
+    'ansiwhite': 'ffffff',
+}
+# mapping of deprecated #ansixxx colors to new color names
+_deprecated_ansicolors = {
+    # dark
+    '#ansiblack': 'ansiblack',
+    '#ansidarkred': 'ansired',
+    '#ansidarkgreen': 'ansigreen',
+    '#ansibrown': 'ansiyellow',
+    '#ansidarkblue': 'ansiblue',
+    '#ansipurple': 'ansimagenta',
+    '#ansiteal': 'ansicyan',
+    '#ansilightgray': 'ansigray',
+    # normal
+    '#ansidarkgray': 'ansibrightblack',
+    '#ansired': 'ansibrightred',
+    '#ansigreen': 'ansibrightgreen',
+    '#ansiyellow': 'ansibrightyellow',
+    '#ansiblue': 'ansibrightblue',
+    '#ansifuchsia': 'ansibrightmagenta',
+    '#ansiturquoise': 'ansibrightcyan',
+    '#ansiwhite': 'ansiwhite',
+}
+ansicolors = set(_ansimap)
+class StyleMeta(type):
+    def __new__(mcs, name, bases, dct):
+        obj = type.__new__(mcs, name, bases, dct)
+        for token in STANDARD_TYPES:
+            if token not in obj.styles:
+                obj.styles[token] = ''
+        def colorformat(text):
+            if text in ansicolors:
+                return text
+            if text[0:1] == '#':
+                col = text[1:]
+                if len(col) == 6:
+                    return col
+                elif len(col) == 3:
+                    return col[0] * 2 + col[1] * 2 + col[2] * 2
+            elif text == '':
+                return ''
+            elif text.startswith('var') or text.startswith('calc'):
+                return text
+            assert False, f"wrong color format {text!r}"
+        _styles = obj._styles = {}
+        for ttype in obj.styles:
+            for token in ttype.split():
+                if token in _styles:
+                    continue
+                ndef = _styles.get(token.parent, None)
+                styledefs = obj.styles.get(token, '').split()
+                if not ndef or token is None:
+                    ndef = ['', 0, 0, 0, '', '', 0, 0, 0]
+                elif 'noinherit' in styledefs and token is not Token:
+                    ndef = _styles[Token][:]
+                else:
+                    ndef = ndef[:]
+                _styles[token] = ndef
+                for styledef in obj.styles.get(token, '').split():
+                    if styledef == 'noinherit':
+                        pass
+                    elif styledef == 'bold':
+                        ndef[1] = 1
+                    elif styledef == 'nobold':
+                        ndef[1] = 0
+                    elif styledef == 'italic':
+                        ndef[2] = 1
+                    elif styledef == 'noitalic':
+                        ndef[2] = 0
+                    elif styledef == 'underline':
+                        ndef[3] = 1
+                    elif styledef == 'nounderline':
+                        ndef[3] = 0
+                    elif styledef[:3] == 'bg:':
+                        ndef[4] = colorformat(styledef[3:])
+                    elif styledef[:7] == 'border:':
+                        ndef[5] = colorformat(styledef[7:])
+                    elif styledef == 'roman':
+                        ndef[6] = 1
+                    elif styledef == 'sans':
+                        ndef[7] = 1
+                    elif styledef == 'mono':
+                        ndef[8] = 1
+                    else:
+                        ndef[0] = colorformat(styledef)
+        return obj
+    def style_for_token(cls, token):
+        t = cls._styles[token]
+        ansicolor = bgansicolor = None
+        color = t[0]
+        if color in _deprecated_ansicolors:
+            color = _deprecated_ansicolors[color]
+        if color in ansicolors:
+            ansicolor = color
+            color = _ansimap[color]
+        bgcolor = t[4]
+        if bgcolor in _deprecated_ansicolors:
+            bgcolor = _deprecated_ansicolors[bgcolor]
+        if bgcolor in ansicolors:
+            bgansicolor = bgcolor
+            bgcolor = _ansimap[bgcolor]
+        return {
+            'color':        color or None,
+            'bold':         bool(t[1]),
+            'italic':       bool(t[2]),
+            'underline':    bool(t[3]),
+            'bgcolor':      bgcolor or None,
+            'border':       t[5] or None,
+            'roman':        bool(t[6]) or None,
+            'sans':         bool(t[7]) or None,
+            'mono':         bool(t[8]) or None,
+            'ansicolor':    ansicolor,
+            'bgansicolor':  bgansicolor,
+        }
+    def list_styles(cls):
+        return list(cls)
+    def styles_token(cls, ttype):
+        return ttype in cls._styles
+    def __iter__(cls):
+        for token in cls._styles:
+            yield token, cls.style_for_token(token)
+    def __len__(cls):
+        return len(cls._styles)
+class Style(metaclass=StyleMeta):
+    #: overall background color (``None`` means transparent)
+    background_color = '#ffffff'
+    #: highlight background color
+    highlight_color = '#ffffcc'
+    #: line number font color
+    line_number_color = 'inherit'
+    #: line number background color
+    line_number_background_color = 'transparent'
+    #: special line number font color
+    line_number_special_color = '#000000'
+    #: special line number background color
+    line_number_special_background_color = '#ffffc0'
+    #: Style definitions for individual token types.
+    styles = {}
+    #: user-friendly style name (used when selecting the style, so this
+    # should be all-lowercase, no spaces, hyphens)
+    name = 'unnamed'
+    aliases = []
+    # Attribute for lexers defined within Pygments. If set
+    # to True, the style is not shown in the style gallery
+    # on the website. This is intended for language-specific
+    # styles.
+    web_style_gallery_exclude = False

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/styles/__init__.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""
+    pygments.styles
+    ~~~~~~~~~~~~~~~
+    Contains built-in styles.
+    :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+from pip._vendor.pygments.plugin import find_plugin_styles
+from pip._vendor.pygments.util import ClassNotFound
+from pip._vendor.pygments.styles._mapping import STYLES
+#: A dictionary of built-in styles, mapping style names to
+#: ``'submodule::classname'`` strings.
+#: This list is deprecated. Use `pygments.styles.STYLES` instead
+STYLE_MAP = {v[1]: v[0].split('.')[-1] + '::' + k for k, v in STYLES.items()}
+#: Internal reverse mapping to make `get_style_by_name` more efficient
+_STYLE_NAME_TO_MODULE_MAP = {v[1]: (v[0], k) for k, v in STYLES.items()}
+def get_style_by_name(name):
+    """
+    Return a style class by its short name. The names of the builtin styles
+    are listed in :data:`pygments.styles.STYLE_MAP`.
+    Will raise :exc:`pygments.util.ClassNotFound` if no style of that name is
+    found.
+    """
+    if name in _STYLE_NAME_TO_MODULE_MAP:
+        mod, cls = _STYLE_NAME_TO_MODULE_MAP[name]
+        builtin = "yes"
+    else:
+        for found_name, style in find_plugin_styles():
+            if name == found_name:
+                return style
+        # perhaps it got dropped into our styles package
+        builtin = ""
+        mod = 'pygments.styles.' + name
+        cls = name.title() + "Style"
+    try:
+        mod = __import__(mod, None, None, [cls])
+    except ImportError:
+        raise ClassNotFound(f"Could not find style module {mod!r}" +
+                            (builtin and ", though it should be builtin")
+                            + ".")
+    try:
+        return getattr(mod, cls)
+    except AttributeError:
+        raise ClassNotFound(f"Could not find style class {cls!r} in style module.")
+def get_all_styles():
+    """Return a generator for all styles by name, both builtin and plugin."""
+    for v in STYLES.values():
+        yield v[1]
+    for name, _ in find_plugin_styles():
+        yield name

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/styles/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (3.13 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/styles/__pycache__/_mapping.cpython-311.pyc ADDED Viewed

Binary file (3.69 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pygments/token.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""
+    pygments.token
+    ~~~~~~~~~~~~~~
+    Basic token types and the standard tokens.
+    :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+class _TokenType(tuple):
+    parent = None
+    def split(self):
+        buf = []
+        node = self
+        while node is not None:
+            buf.append(node)
+            node = node.parent
+        buf.reverse()
+        return buf
+    def __init__(self, *args):
+        # no need to call super.__init__
+        self.subtypes = set()
+    def __contains__(self, val):
+        return self is val or (
+            type(val) is self.__class__ and
+            val[:len(self)] == self
+        )
+    def __getattr__(self, val):
+        if not val or not val[0].isupper():
+            return tuple.__getattribute__(self, val)
+        new = _TokenType(self + (val,))
+        setattr(self, val, new)
+        self.subtypes.add(new)
+        new.parent = self
+        return new
+    def __repr__(self):
+        return 'Token' + (self and '.' or '') + '.'.join(self)
+    def __copy__(self):
+        # These instances are supposed to be singletons
+        return self
+    def __deepcopy__(self, memo):
+        # These instances are supposed to be singletons
+        return self
+Token = _TokenType()
+# Special token types
+Text = Token.Text
+Whitespace = Text.Whitespace
+Escape = Token.Escape
+Error = Token.Error
+# Text that doesn't belong to this lexer (e.g. HTML in PHP)
+Other = Token.Other
+# Common token types for source code
+Keyword = Token.Keyword
+Name = Token.Name
+Literal = Token.Literal
+String = Literal.String
+Number = Literal.Number
+Punctuation = Token.Punctuation
+Operator = Token.Operator
+Comment = Token.Comment
+# Generic types for non-source code
+Generic = Token.Generic
+# String and some others are not direct children of Token.
+# alias them:
+Token.Token = Token
+Token.String = String
+Token.Number = Number
+def is_token_subtype(ttype, other):
+    """
+    Return True if ``ttype`` is a subtype of ``other``.
+    exists for backwards compatibility. use ``ttype in other`` now.
+    """
+    return ttype in other
+def string_to_tokentype(s):
+    """
+    Convert a string into a token type::
+        >>> string_to_token('String.Double')
+        Token.Literal.String.Double
+        >>> string_to_token('Token.Literal.Number')
+        Token.Literal.Number
+        >>> string_to_token('')
+        Token
+    Tokens that are already tokens are returned unchanged:
+        >>> string_to_token(String)
+        Token.Literal.String
+    """
+    if isinstance(s, _TokenType):
+        return s
+    if not s:
+        return Token
+    node = Token
+    for item in s.split('.'):
+        node = getattr(node, item)
+    return node
+# Map standard token types to short names, used in CSS class naming.
+# If you add a new item, please be sure to run this file to perform
+# a consistency check for duplicate values.
+STANDARD_TYPES = {
+    Token:                         '',
+    Text:                          '',
+    Whitespace:                    'w',
+    Escape:                        'esc',
+    Error:                         'err',
+    Other:                         'x',
+    Keyword:                       'k',
+    Keyword.Constant:              'kc',
+    Keyword.Declaration:           'kd',
+    Keyword.Namespace:             'kn',
+    Keyword.Pseudo:                'kp',
+    Keyword.Reserved:              'kr',
+    Keyword.Type:                  'kt',
+    Name:                          'n',
+    Name.Attribute:                'na',
+    Name.Builtin:                  'nb',
+    Name.Builtin.Pseudo:           'bp',
+    Name.Class:                    'nc',
+    Name.Constant:                 'no',
+    Name.Decorator:                'nd',
+    Name.Entity:                   'ni',
+    Name.Exception:                'ne',
+    Name.Function:                 'nf',
+    Name.Function.Magic:           'fm',
+    Name.Property:                 'py',
+    Name.Label:                    'nl',
+    Name.Namespace:                'nn',
+    Name.Other:                    'nx',
+    Name.Tag:                      'nt',
+    Name.Variable:                 'nv',
+    Name.Variable.Class:           'vc',
+    Name.Variable.Global:          'vg',
+    Name.Variable.Instance:        'vi',
+    Name.Variable.Magic:           'vm',
+    Literal:                       'l',
+    Literal.Date:                  'ld',
+    String:                        's',
+    String.Affix:                  'sa',
+    String.Backtick:               'sb',
+    String.Char:                   'sc',
+    String.Delimiter:              'dl',
+    String.Doc:                    'sd',
+    String.Double:                 's2',
+    String.Escape:                 'se',
+    String.Heredoc:                'sh',
+    String.Interpol:               'si',
+    String.Other:                  'sx',
+    String.Regex:                  'sr',
+    String.Single:                 's1',
+    String.Symbol:                 'ss',
+    Number:                        'm',
+    Number.Bin:                    'mb',
+    Number.Float:                  'mf',
+    Number.Hex:                    'mh',
+    Number.Integer:                'mi',
+    Number.Integer.Long:           'il',
+    Number.Oct:                    'mo',
+    Operator:                      'o',
+    Operator.Word:                 'ow',
+    Punctuation:                   'p',
+    Punctuation.Marker:            'pm',
+    Comment:                       'c',
+    Comment.Hashbang:              'ch',
+    Comment.Multiline:             'cm',
+    Comment.Preproc:               'cp',
+    Comment.PreprocFile:           'cpf',
+    Comment.Single:                'c1',
+    Comment.Special:               'cs',
+    Generic:                       'g',
+    Generic.Deleted:               'gd',
+    Generic.Emph:                  'ge',
+    Generic.Error:                 'gr',
+    Generic.Heading:               'gh',
+    Generic.Inserted:              'gi',
+    Generic.Output:                'go',
+    Generic.Prompt:                'gp',
+    Generic.Strong:                'gs',
+    Generic.Subheading:            'gu',
+    Generic.EmphStrong:            'ges',
+    Generic.Traceback:             'gt',
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .modules import * # noqa: F403

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/qat/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (258 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from .linear_relu import LinearReLU, LinearLeakyReLU, LinearTanh
+from .conv_relu import ConvReLU1d, ConvReLU2d, ConvReLU3d
+from .bn_relu import BNReLU2d, BNReLU3d
+from .conv_add import ConvAdd2d, ConvAddReLU2d
+__all__ = [
+    'LinearReLU',
+    'ConvReLU1d',
+    'ConvReLU2d',
+    'ConvReLU3d',
+    'BNReLU2d',
+    'BNReLU3d',
+    'LinearLeakyReLU',
+    'LinearTanh',
+    'ConvAdd2d',
+    'ConvAddReLU2d',
+]