koichi12 commited on Feb 12, 2025

Commit

dbf954e

verified ·

1 Parent(s): 2e7ec00

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/magic_trace.py +42 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/wrap_type.py +71 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__init__.py +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__pycache__/_parsing.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__pycache__/rearrange.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/rearrange.py +207 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/extrapolation.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/quadrature.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/approximation.py +246 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/__init__.py +2 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/__pycache__/calculus.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/calculus.py +531 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/eigen.py +877 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/linalg.py +790 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/matrices.py +1005 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/cublasLt.h +1853 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/cublas_api.h +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/cublas_v2.h +273 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/lib/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling.h +923 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer_v8.h +571 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_version.h +70 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cudalibxt.h +97 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftXt.h +269 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/include/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_nvtx_cu11-11.8.86.dist-info/METADATA +35 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_nvtx_cu11-11.8.86.dist-info/WHEEL +5 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/__pycache__/_elffile.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/_parser.py +354 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/markers.py +331 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/metadata.py +863 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/py.typed +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/specifiers.py +1020 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/tags.py +617 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/version.py +582 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/cachecontrol/__init__.py +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-311.pyc +0 -0

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (215 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc ADDED Viewed

Binary file (2.48 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from . import _Tensor, Tensor
+from .reference import _dims, _enable_layers, llist, ltuple
+class DelayedMulTensor(_Tensor):
+    def __init__(self, lhs, rhs):
+        self._lhs, self._rhs = lhs, rhs
+        self._data = None
+        self._levels_data = None
+        self._has_device = lhs._has_device or rhs._has_device
+        self._batchtensor_data = None
+        self._tensor_data = None
+    @property
+    def _levels(self):
+        if self._levels_data is None:
+            levels = llist(self._lhs._levels)
+            for l in self._rhs._levels:
+                if l not in levels:
+                    levels.append(l)
+            self._levels_data = ltuple(levels)
+        return self._levels_data
+    @property
+    def _batchtensor(self):
+        if self._batchtensor_data is None:
+            with _enable_layers(self._levels):
+                print("bt multiply fallback")
+                self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
+        return self._batchtensor_data
+    @property
+    def _tensor(self):
+        if self._tensor_data is None:
+            self._tensor_data = Tensor.from_batched(
+                self._batchtensor, self._has_device
+            )._tensor
+        return self._tensor_data
+    @property
+    def ndim(self):
+        return self._batchtensor.ndim
+    @property
+    def dims(self):
+        return ltuple(super().dims)
+    def sum(self, dim):
+        dims = _dims(dim, 0, False, False)
+        n = ord("a")
+        all_levels = self._levels
+        def to_char(d):
+            return chr(n + all_levels.index(d))
+        plhs, levelslhs = self._lhs._tensor, self._lhs._levels
+        prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
+        new_dims = tuple(d for d in self.dims if d not in dims)
+        new_levels = [l for l in self._levels if l not in dims]
+        fmt = "".join(
+            [
+                *(to_char(d) for d in levelslhs),
+                ",",
+                *(to_char(d) for d in levelsrhs),
+                "->",
+                *(to_char(d) for d in new_levels),
+            ]
+        )
+        result_data = torch.einsum(fmt, (plhs, prhs))
+        return Tensor.from_positional(result_data, new_levels, True)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/magic_trace.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import signal
+import subprocess
+from contextlib import contextmanager
+@contextmanager
+def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
+    pid = os.getpid()
+    if not os.path.exists(magic_trace_cache):
+        print(f"Downloading magic_trace to: {magic_trace_cache}")
+        subprocess.run(
+            [
+                "wget",
+                "-O",
+                magic_trace_cache,
+                "-q",
+                "https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
+            ]
+        )
+        subprocess.run(["chmod", "+x", magic_trace_cache])
+    args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
+    p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
+    while True:
+        x = p.stderr.readline()
+        print(x)
+        if "Attached" in x:
+            break
+    try:
+        yield
+    finally:
+        p.send_signal(signal.SIGINT)
+        r = p.wait()
+        print(p.stderr.read())
+        p.stderr.close()
+        if r != 0:
+            raise ValueError(f"magic_trace exited abnormally: {r}")

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/dim/wrap_type.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from types import (
+    BuiltinMethodType,
+    FunctionType,
+    GetSetDescriptorType,
+    MethodDescriptorType,
+    WrapperDescriptorType,
+)
+from functorch._C import dim as _C
+_wrap_method = _C._wrap_method
+FUNC_TYPES = (
+    FunctionType,
+    MethodDescriptorType,
+    BuiltinMethodType,
+    WrapperDescriptorType,
+)
+PROPERTY_TYPES = (GetSetDescriptorType, property)
+def _py_wrap_method(orig, __torch_function__):
+    def impl(*args, **kwargs):
+        return __torch_function__(orig, None, args, kwargs)
+    return impl
+def wrap_type(use_c, to_patch, pattern, __torch_function__):
+    if use_c:
+        wrap_method = _wrap_method
+    else:
+        wrap_method = _py_wrap_method
+    all = {}
+    for t in reversed(pattern.mro()[:-1]):  # skip object
+        all.update(t.__dict__)
+    def wrap_attr(orig):
+        return property(wrap_method(orig.__get__, __torch_function__))
+    for name, obj in all.items():
+        if name in (
+            "__dict__",
+            "__new__",
+            "__init__",
+            "__repr__",
+            "__weakref__",
+            "__doc__",
+            "__module__",
+            "__dir__",
+        ):
+            continue
+        # skip things that have been overloaded
+        # things that come from object like `__eq__` still need to be patched, however.
+        if hasattr(to_patch, name) and getattr(to_patch, name) is not getattr(
+            object, name, None
+        ):
+            continue
+        if isinstance(obj, FUNC_TYPES):
+            setattr(to_patch, name, wrap_method(obj, __torch_function__))
+        elif isinstance(obj, PROPERTY_TYPES):
+            setattr(to_patch, name, wrap_attr(obj))

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .rearrange import rearrange
2	+
3	+ __all__ = ["rearrange"]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (288 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__pycache__/_parsing.cpython-311.pyc ADDED Viewed

Binary file (14.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/__pycache__/rearrange.cpython-311.pyc ADDED Viewed

Binary file (10.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/einops/rearrange.py ADDED Viewed

	@@ -0,0 +1,207 @@

+from __future__ import annotations
+import functools
+from typing import Callable, Dict, List, Sequence, Tuple, Union
+import torch
+from functorch._C import dim as _C
+from ._parsing import (
+    _ellipsis,
+    AnonymousAxis,
+    comma_separate,
+    parse_pattern,
+    validate_rearrange_expressions,
+)
+__all__ = ["rearrange"]
+dims = _C.dims
+@functools.lru_cache(256)
+def _create_rearrange_callable(
+    tensor_ndim: int, pattern: str, **axes_lengths: int
+) -> Callable[[torch.Tensor], torch.Tensor]:
+    r"""Translate an `einops`-style pattern into a callable that performs the rearrange using first-class dimensions.
+    Since the an equivalent result is computed for tensors with the same number of dimensions, with the same pattern and
+    specified axes lengths, this function can be memoized.
+    Args:
+        tensor_ndim (int): the number of dimensions in the tensor to rearrange
+        pattern (str): the `einops`-style rearrangement pattern
+        axes_lengths (int): any additional length specifications for dimensions
+    Returns:
+        Callable[[torch.Tensor], torch.Tensor]: a callable that performs the rearrangement
+    """
+    left, right = parse_pattern(pattern, axes_lengths)
+    validate_rearrange_expressions(left, right, axes_lengths)
+    n_anon_dims = sum(not dim for dim in left.composition)
+    if left.has_ellipsis:
+        n_ellipsis_dims = tensor_ndim - (len(left.composition) - 1)
+        n_named_dims = len(left.identifiers) - 1
+        if (pattern_ndim := n_anon_dims + n_named_dims) > tensor_ndim:
+            raise ValueError(
+                f"Number of dimensions in pattern ({pattern_ndim}) must be less than or equal to the number of "
+                f"dimensions in the tensor ({tensor_ndim})"
+            )
+    else:
+        n_ellipsis_dims = 0
+        n_named_dims = len(left.identifiers)
+        if (pattern_ndim := len(left.composition)) != tensor_ndim:
+            raise ValueError(
+                f"Number of dimensions in pattern ({pattern_ndim}) must be equal to the number of dimensions in "
+                f"the tensor ({tensor_ndim})"
+            )
+    n_dims = n_named_dims + n_ellipsis_dims + n_anon_dims
+    if n_dims == 0:
+        # an identity rearrangement on a 0-dimension tensor
+        return lambda tensor: tensor
+    first_class_dims: Tuple[str, ...] = tuple(f"d{i}" for i in range(n_dims))
+    identifier_dim_map: Dict[Union[str, AnonymousAxis], Tuple[str, ...]] = {}
+    anon_axes: List[AnonymousAxis] = []
+    # map the left-hand side identifiers to strings representing first class dims
+    dims_i = 0
+    for dimension in left.composition:
+        if isinstance(dimension, list):
+            for identifier in dimension:
+                # non-unitary anon axes are not allowed in rearrange & unitary anon axes are represented as empty lists
+                assert isinstance(identifier, str)
+                identifier_dim_map[identifier] = (first_class_dims[dims_i],)
+                dims_i += 1
+            if not dimension:
+                # unitary anonymous axis
+                anon_axis = AnonymousAxis("1")
+                identifier_dim_map[anon_axis] = (first_class_dims[dims_i],)
+                anon_axes.append(anon_axis)
+                dimension.append(anon_axis)
+                dims_i += 1
+        elif dimension == _ellipsis:
+            identifier = _ellipsis
+            identifier_dim_map[identifier] = tuple(
+                first_class_dims[dims_i + j] for j in range(n_ellipsis_dims)
+            )
+            dims_i += n_ellipsis_dims
+        else:
+            raise ValueError(f"Unexpected dimension: {dimension}")
+    def composition_to_dims(
+        composition: Sequence[Union[List[Union[str, AnonymousAxis]], str]]
+    ) -> List[Union[str, Tuple[str, ...]]]:
+        """Convert a `ParsedExpression.composition` into a `Tensor.__getitem__` index of strings representing first
+        class dims."""
+        dim_composition: List[Union[str, Tuple[str, ...]]] = []
+        for dimension in composition:
+            if isinstance(dimension, list):
+                dim_composition.append(
+                    tuple(
+                        dim
+                        for identifier in dimension
+                        for dim in identifier_dim_map[identifier]
+                    )
+                )
+            elif dimension == _ellipsis:
+                dim_composition.extend(identifier_dim_map[_ellipsis])
+            else:
+                raise ValueError(f"Unexpected dimension: {dimension}")
+        return dim_composition
+    left_dims = composition_to_dims(left.composition)
+    right_dims = composition_to_dims(right.composition)
+    anon_dims = tuple(identifier_dim_map[axis][0] for axis in anon_axes)
+    specified_lengths = tuple(
+        (identifier_dim_map[axis][0], length) for axis, length in axes_lengths.items()
+    )
+    custom_rearrange_callable_name = "do_rearrange"
+    custom_rearrange_callable_code = (
+        (
+            f"def {custom_rearrange_callable_name}(tensor):\n"
+            f"    {comma_separate(first_class_dims)} = dims({n_dims})\n"
+        )
+        + (
+            "".join(
+                f"    {dim}.size = {length}\n" for (dim, length) in specified_lengths
+            )
+            if specified_lengths
+            else ""
+        )
+        + f"    tensor = tensor[{comma_separate(left_dims)}].order({comma_separate(right_dims)})\n"
+        + (
+            f"    return tensor.sum({comma_separate([anon_dims])}, keepdim=False)\n"
+            if anon_dims
+            else "    return tensor\n"
+        )
+    )
+    exec(custom_rearrange_callable_code)
+    return locals()[custom_rearrange_callable_name]
+def rearrange(
+    tensor: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor, ...]],
+    pattern: str,
+    **axes_lengths: int,
+) -> torch.Tensor:
+    r"""A native implementation of `einops.rearrange`, a reader-friendly smart element reordering for multidimensional
+    tensors. This operation includes functionality of transpose (axes permutation), reshape (view), squeeze, unsqueeze,
+    stack, concatenate and other operations.
+    See: https://einops.rocks/api/rearrange/
+    Args:
+        tensor (Tensor or sequence of Tensor): the tensor(s) to rearrange
+        pattern (str): the rearrangement pattern
+        axes_lengths (int): any additional length specifications for dimensions
+    Returns:
+        Tensor: the rearranged tensor
+    Examples:
+        >>> # suppose we have a set of 32 images in "h w c" format (height-width-channel)
+        >>> images = torch.randn((32, 30, 40, 3))
+        >>> # stack along first (batch) axis, output is a single array
+        >>> rearrange(images, 'b h w c -> b h w c').shape
+        torch.Size([32, 30, 40, 3])
+        >>> # concatenate images along height (vertical axis), 960 = 32 * 30
+        >>> rearrange(images, 'b h w c -> (b h) w c').shape
+        torch.Size([960, 40, 3])
+        >>> # concatenated images along horizontal axis, 1280 = 32 * 40
+        >>> rearrange(images, 'b h w c -> h (b w) c').shape
+        torch.Size([30, 1280, 3])
+        >>> # reordered axes to "b c h w" format for deep learning
+        >>> rearrange(images, 'b h w c -> b c h w').shape
+        torch.Size([32, 3, 30, 40])
+        >>> # flattened each image into a vector, 3600 = 30 * 40 * 3
+        >>> rearrange(images, 'b h w c -> b (c h w)').shape
+        torch.Size([32, 3600])
+        >>> # split each image into 4 smaller (top-left, top-right, bottom-left, bottom-right), 128 = 32 * 2 * 2
+        >>> rearrange(images, 'b (h1 h) (w1 w) c -> (b h1 w1) h w c', h1=2, w1=2).shape
+        torch.Size([128, 15, 20, 3])
+        >>> # space-to-depth operation
+        >>> rearrange(images, 'b (h h1) (w w1) c -> b h w (c h1 w1)', h1=2, w1=2).shape
+        torch.Size([32, 15, 20, 12])
+    """
+    if not isinstance(tensor, torch.Tensor):
+        tensor = torch.stack(tensor)
+    rearrange_callable = _create_rearrange_callable(
+        tensor.ndim, pattern, **axes_lengths
+    )
+    return rearrange_callable(tensor)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/extrapolation.cpython-311.pyc ADDED Viewed

Binary file (89.6 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/quadrature.cpython-311.pyc ADDED Viewed

Binary file (50.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/approximation.py ADDED Viewed

	@@ -0,0 +1,246 @@

+from ..libmp.backend import xrange
+from .calculus import defun
+#----------------------------------------------------------------------------#
+#                              Approximation methods                         #
+#----------------------------------------------------------------------------#
+# The Chebyshev approximation formula is given at:
+# http://mathworld.wolfram.com/ChebyshevApproximationFormula.html
+# The only major changes in the following code is that we return the
+# expanded polynomial coefficients instead of Chebyshev coefficients,
+# and that we automatically transform [a,b] -> [-1,1] and back
+# for convenience.
+# Coefficient in Chebyshev approximation
+def chebcoeff(ctx,f,a,b,j,N):
+    s = ctx.mpf(0)
+    h = ctx.mpf(0.5)
+    for k in range(1, N+1):
+        t = ctx.cospi((k-h)/N)
+        s += f(t*(b-a)*h + (b+a)*h) * ctx.cospi(j*(k-h)/N)
+    return 2*s/N
+# Generate Chebyshev polynomials T_n(ax+b) in expanded form
+def chebT(ctx, a=1, b=0):
+    Tb = [1]
+    yield Tb
+    Ta = [b, a]
+    while 1:
+        yield Ta
+        # Recurrence: T[n+1](ax+b) = 2*(ax+b)*T[n](ax+b) - T[n-1](ax+b)
+        Tmp = [0] + [2*a*t for t in Ta]
+        for i, c in enumerate(Ta): Tmp[i] += 2*b*c
+        for i, c in enumerate(Tb): Tmp[i] -= c
+        Ta, Tb = Tmp, Ta
+@defun
+def chebyfit(ctx, f, interval, N, error=False):
+    r"""
+    Computes a polynomial of degree `N-1` that approximates the
+    given function `f` on the interval `[a, b]`. With ``error=True``,
+    :func:`~mpmath.chebyfit` also returns an accurate estimate of the
+    maximum absolute error; that is, the maximum value of
+    `|f(x) - P(x)|` for `x \in [a, b]`.
+    :func:`~mpmath.chebyfit` uses the Chebyshev approximation formula,
+    which gives a nearly optimal solution: that is, the maximum
+    error of the approximating polynomial is very close to
+    the smallest possible for any polynomial of the same degree.
+    Chebyshev approximation is very useful if one needs repeated
+    evaluation of an expensive function, such as function defined
+    implicitly by an integral or a differential equation. (For
+    example, it could be used to turn a slow mpmath function
+    into a fast machine-precision version of the same.)
+    **Examples**
+    Here we use :func:`~mpmath.chebyfit` to generate a low-degree approximation
+    of `f(x) = \cos(x)`, valid on the interval `[1, 2]`::
+        >>> from mpmath import *
+        >>> mp.dps = 15; mp.pretty = True
+        >>> poly, err = chebyfit(cos, [1, 2], 5, error=True)
+        >>> nprint(poly)
+        [0.00291682, 0.146166, -0.732491, 0.174141, 0.949553]
+        >>> nprint(err, 12)
+        1.61351758081e-5
+    The polynomial can be evaluated using ``polyval``::
+        >>> nprint(polyval(poly, 1.6), 12)
+        -0.0291858904138
+        >>> nprint(cos(1.6), 12)
+        -0.0291995223013
+    Sampling the true error at 1000 points shows that the error
+    estimate generated by ``chebyfit`` is remarkably good::
+        >>> error = lambda x: abs(cos(x) - polyval(poly, x))
+        >>> nprint(max([error(1+n/1000.) for n in range(1000)]), 12)
+        1.61349954245e-5
+    **Choice of degree**
+    The degree `N` can be set arbitrarily high, to obtain an
+    arbitrarily good approximation. As a rule of thumb, an
+    `N`-term Chebyshev approximation is good to `N/(b-a)` decimal
+    places on a unit interval (although this depends on how
+    well-behaved `f` is). The cost grows accordingly: ``chebyfit``
+    evaluates the function `(N^2)/2` times to compute the
+    coefficients and an additional `N` times to estimate the error.
+    **Possible issues**
+    One should be careful to use a sufficiently high working
+    precision both when calling ``chebyfit`` and when evaluating
+    the resulting polynomial, as the polynomial is sometimes
+    ill-conditioned. It is for example difficult to reach
+    15-digit accuracy when evaluating the polynomial using
+    machine precision floats, no matter the theoretical
+    accuracy of the polynomial. (The option to return the
+    coefficients in Chebyshev form should be made available
+    in the future.)
+    It is important to note the Chebyshev approximation works
+    poorly if `f` is not smooth. A function containing singularities,
+    rapid oscillation, etc can be approximated more effectively by
+    multiplying it by a weight function that cancels out the
+    nonsmooth features, or by dividing the interval into several
+    segments.
+    """
+    a, b = ctx._as_points(interval)
+    orig = ctx.prec
+    try:
+        ctx.prec = orig + int(N**0.5) + 20
+        c = [chebcoeff(ctx,f,a,b,k,N) for k in range(N)]
+        d = [ctx.zero] * N
+        d[0] = -c[0]/2
+        h = ctx.mpf(0.5)
+        T = chebT(ctx, ctx.mpf(2)/(b-a), ctx.mpf(-1)*(b+a)/(b-a))
+        for (k, Tk) in zip(range(N), T):
+            for i in range(len(Tk)):
+                d[i] += c[k]*Tk[i]
+        d = d[::-1]
+        # Estimate maximum error
+        err = ctx.zero
+        for k in range(N):
+            x = ctx.cos(ctx.pi*k/N) * (b-a)*h + (b+a)*h
+            err = max(err, abs(f(x) - ctx.polyval(d, x)))
+    finally:
+        ctx.prec = orig
+    if error:
+        return d, +err
+    else:
+        return d
+@defun
+def fourier(ctx, f, interval, N):
+    r"""
+    Computes the Fourier series of degree `N` of the given function
+    on the interval `[a, b]`. More precisely, :func:`~mpmath.fourier` returns
+    two lists `(c, s)` of coefficients (the cosine series and sine
+    series, respectively), such that
+    .. math ::
+        f(x) \sim \sum_{k=0}^N
+            c_k \cos(k m x) + s_k \sin(k m x)
+    where `m = 2 \pi / (b-a)`.
+    Note that many texts define the first coefficient as `2 c_0` instead
+    of `c_0`. The easiest way to evaluate the computed series correctly
+    is to pass it to :func:`~mpmath.fourierval`.
+    **Examples**
+    The function `f(x) = x` has a simple Fourier series on the standard
+    interval `[-\pi, \pi]`. The cosine coefficients are all zero (because
+    the function has odd symmetry), and the sine coefficients are
+    rational numbers::
+        >>> from mpmath import *
+        >>> mp.dps = 15; mp.pretty = True
+        >>> c, s = fourier(lambda x: x, [-pi, pi], 5)
+        >>> nprint(c)
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        >>> nprint(s)
+        [0.0, 2.0, -1.0, 0.666667, -0.5, 0.4]
+    This computes a Fourier series of a nonsymmetric function on
+    a nonstandard interval::
+        >>> I = [-1, 1.5]
+        >>> f = lambda x: x**2 - 4*x + 1
+        >>> cs = fourier(f, I, 4)
+        >>> nprint(cs[0])
+        [0.583333, 1.12479, -1.27552, 0.904708, -0.441296]
+        >>> nprint(cs[1])
+        [0.0, -2.6255, 0.580905, 0.219974, -0.540057]
+    It is instructive to plot a function along with its truncated
+    Fourier series::
+        >>> plot([f, lambda x: fourierval(cs, I, x)], I) #doctest: +SKIP
+    Fourier series generally converge slowly (and may not converge
+    pointwise). For example, if `f(x) = \cosh(x)`, a 10-term Fourier
+    series gives an `L^2` error corresponding to 2-digit accuracy::
+        >>> I = [-1, 1]
+        >>> cs = fourier(cosh, I, 9)
+        >>> g = lambda x: (cosh(x) - fourierval(cs, I, x))**2
+        >>> nprint(sqrt(quad(g, I)))
+        0.00467963
+    :func:`~mpmath.fourier` uses numerical quadrature. For nonsmooth functions,
+    the accuracy (and speed) can be improved by including all singular
+    points in the interval specification::
+        >>> nprint(fourier(abs, [-1, 1], 0), 10)
+        ([0.5000441648], [0.0])
+        >>> nprint(fourier(abs, [-1, 0, 1], 0), 10)
+        ([0.5], [0.0])
+    """
+    interval = ctx._as_points(interval)
+    a = interval[0]
+    b = interval[-1]
+    L = b-a
+    cos_series = []
+    sin_series = []
+    cutoff = ctx.eps*10
+    for n in xrange(N+1):
+        m = 2*n*ctx.pi/L
+        an = 2*ctx.quadgl(lambda t: f(t)*ctx.cos(m*t), interval)/L
+        bn = 2*ctx.quadgl(lambda t: f(t)*ctx.sin(m*t), interval)/L
+        if n == 0:
+            an /= 2
+        if abs(an) < cutoff: an = ctx.zero
+        if abs(bn) < cutoff: bn = ctx.zero
+        cos_series.append(an)
+        sin_series.append(bn)
+    return cos_series, sin_series
+@defun
+def fourierval(ctx, series, interval, x):
+    """
+    Evaluates a Fourier series (in the format computed by
+    by :func:`~mpmath.fourier` for the given interval) at the point `x`.
+    The series should be a pair `(c, s)` where `c` is the
+    cosine series and `s` is the sine series. The two lists
+    need not have the same length.
+    """
+    cs, ss = series
+    ab = ctx._as_points(interval)
+    a = interval[0]
+    b = interval[-1]
+    m = 2*ctx.pi/(ab[-1]-ab[0])
+    s = ctx.zero
+    s += ctx.fsum(cs[n]*ctx.cos(m*n*x) for n in xrange(len(cs)) if cs[n])
+    s += ctx.fsum(ss[n]*ctx.sin(m*n*x) for n in xrange(len(ss)) if ss[n])
+    return s

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from . import eigen # to set methods
2	+ from . import eigen_symmetric # to set methods

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/__pycache__/calculus.cpython-311.pyc ADDED Viewed

Binary file (22.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/calculus.py ADDED Viewed

	@@ -0,0 +1,531 @@

+from ..libmp.backend import xrange
+# TODO: should use diagonalization-based algorithms
+class MatrixCalculusMethods(object):
+    def _exp_pade(ctx, a):
+        """
+        Exponential of a matrix using Pade approximants.
+        See G. H. Golub, C. F. van Loan 'Matrix Computations',
+        third Ed., page 572
+        TODO:
+         - find a good estimate for q
+         - reduce the number of matrix multiplications to improve
+           performance
+        """
+        def eps_pade(p):
+            return ctx.mpf(2)**(3-2*p) * \
+                ctx.factorial(p)**2/(ctx.factorial(2*p)**2 * (2*p + 1))
+        q = 4
+        extraq = 8
+        while 1:
+            if eps_pade(q) < ctx.eps:
+                break
+            q += 1
+        q += extraq
+        j = int(max(1, ctx.mag(ctx.mnorm(a,'inf'))))
+        extra = q
+        prec = ctx.prec
+        ctx.dps += extra + 3
+        try:
+            a = a/2**j
+            na = a.rows
+            den = ctx.eye(na)
+            num = ctx.eye(na)
+            x = ctx.eye(na)
+            c = ctx.mpf(1)
+            for k in range(1, q+1):
+                c *= ctx.mpf(q - k + 1)/((2*q - k + 1) * k)
+                x = a*x
+                cx = c*x
+                num += cx
+                den += (-1)**k * cx
+            f = ctx.lu_solve_mat(den, num)
+            for k in range(j):
+                f = f*f
+        finally:
+            ctx.prec = prec
+        return f*1
+    def expm(ctx, A, method='taylor'):
+        r"""
+        Computes the matrix exponential of a square matrix `A`, which is defined
+        by the power series
+        .. math ::
+            \exp(A) = I + A + \frac{A^2}{2!} + \frac{A^3}{3!} + \ldots
+        With method='taylor', the matrix exponential is computed
+        using the Taylor series. With method='pade', Pade approximants
+        are used instead.
+        **Examples**
+        Basic examples::
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = True
+            >>> expm(zeros(3))
+            [1.0  0.0  0.0]
+            [0.0  1.0  0.0]
+            [0.0  0.0  1.0]
+            >>> expm(eye(3))
+            [2.71828182845905               0.0               0.0]
+            [             0.0  2.71828182845905               0.0]
+            [             0.0               0.0  2.71828182845905]
+            >>> expm([[1,1,0],[1,0,1],[0,1,0]])
+            [ 3.86814500615414  2.26812870852145  0.841130841230196]
+            [ 2.26812870852145  2.44114713886289   1.42699786729125]
+            [0.841130841230196  1.42699786729125    1.6000162976327]
+            >>> expm([[1,1,0],[1,0,1],[0,1,0]], method='pade')
+            [ 3.86814500615414  2.26812870852145  0.841130841230196]
+            [ 2.26812870852145  2.44114713886289   1.42699786729125]
+            [0.841130841230196  1.42699786729125    1.6000162976327]
+            >>> expm([[1+j, 0], [1+j,1]])
+            [(1.46869393991589 + 2.28735528717884j)                        0.0]
+            [  (1.03776739863568 + 3.536943175722j)  (2.71828182845905 + 0.0j)]
+        Matrices with large entries are allowed::
+            >>> expm(matrix([[1,2],[2,3]])**25)
+            [5.65024064048415e+2050488462815550  9.14228140091932e+2050488462815550]
+            [9.14228140091932e+2050488462815550  1.47925220414035e+2050488462815551]
+        The identity `\exp(A+B) = \exp(A) \exp(B)` does not hold for
+        noncommuting matrices::
+            >>> A = hilbert(3)
+            >>> B = A + eye(3)
+            >>> chop(mnorm(A*B - B*A))
+            0.0
+            >>> chop(mnorm(expm(A+B) - expm(A)*expm(B)))
+            0.0
+            >>> B = A + ones(3)
+            >>> mnorm(A*B - B*A)
+            1.8
+            >>> mnorm(expm(A+B) - expm(A)*expm(B))
+            42.0927851137247
+        """
+        if method == 'pade':
+            prec = ctx.prec
+            try:
+                A = ctx.matrix(A)
+                ctx.prec += 2*A.rows
+                res = ctx._exp_pade(A)
+            finally:
+                ctx.prec = prec
+            return res
+        A = ctx.matrix(A)
+        prec = ctx.prec
+        j = int(max(1, ctx.mag(ctx.mnorm(A,'inf'))))
+        j += int(0.5*prec**0.5)
+        try:
+            ctx.prec += 10 + 2*j
+            tol = +ctx.eps
+            A = A/2**j
+            T = A
+            Y = A**0 + A
+            k = 2
+            while 1:
+                T *= A * (1/ctx.mpf(k))
+                if ctx.mnorm(T, 'inf') < tol:
+                    break
+                Y += T
+                k += 1
+            for k in xrange(j):
+                Y = Y*Y
+        finally:
+            ctx.prec = prec
+        Y *= 1
+        return Y
+    def cosm(ctx, A):
+        r"""
+        Gives the cosine of a square matrix `A`, defined in analogy
+        with the matrix exponential.
+        Examples::
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = True
+            >>> X = eye(3)
+            >>> cosm(X)
+            [0.54030230586814               0.0               0.0]
+            [             0.0  0.54030230586814               0.0]
+            [             0.0               0.0  0.54030230586814]
+            >>> X = hilbert(3)
+            >>> cosm(X)
+            [ 0.424403834569555  -0.316643413047167  -0.221474945949293]
+            [-0.316643413047167   0.820646708837824  -0.127183694770039]
+            [-0.221474945949293  -0.127183694770039   0.909236687217541]
+            >>> X = matrix([[1+j,-2],[0,-j]])
+            >>> cosm(X)
+            [(0.833730025131149 - 0.988897705762865j)  (1.07485840848393 - 0.17192140544213j)]
+            [                                     0.0               (1.54308063481524 + 0.0j)]
+        """
+        B = 0.5 * (ctx.expm(A*ctx.j) + ctx.expm(A*(-ctx.j)))
+        if not sum(A.apply(ctx.im).apply(abs)):
+            B = B.apply(ctx.re)
+        return B
+    def sinm(ctx, A):
+        r"""
+        Gives the sine of a square matrix `A`, defined in analogy
+        with the matrix exponential.
+        Examples::
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = True
+            >>> X = eye(3)
+            >>> sinm(X)
+            [0.841470984807897                0.0                0.0]
+            [              0.0  0.841470984807897                0.0]
+            [              0.0                0.0  0.841470984807897]
+            >>> X = hilbert(3)
+            >>> sinm(X)
+            [0.711608512150994  0.339783913247439  0.220742837314741]
+            [0.339783913247439  0.244113865695532  0.187231271174372]
+            [0.220742837314741  0.187231271174372  0.155816730769635]
+            >>> X = matrix([[1+j,-2],[0,-j]])
+            >>> sinm(X)
+            [(1.29845758141598 + 0.634963914784736j)  (-1.96751511930922 + 0.314700021761367j)]
+            [                                    0.0                  (0.0 - 1.1752011936438j)]
+        """
+        B = (-0.5j) * (ctx.expm(A*ctx.j) - ctx.expm(A*(-ctx.j)))
+        if not sum(A.apply(ctx.im).apply(abs)):
+            B = B.apply(ctx.re)
+        return B
+    def _sqrtm_rot(ctx, A, _may_rotate):
+        # If the iteration fails to converge, cheat by performing
+        # a rotation by a complex number
+        u = ctx.j**0.3
+        return ctx.sqrtm(u*A, _may_rotate) / ctx.sqrt(u)
+    def sqrtm(ctx, A, _may_rotate=2):
+        r"""
+        Computes a square root of the square matrix `A`, i.e. returns
+        a matrix `B = A^{1/2}` such that `B^2 = A`. The square root
+        of a matrix, if it exists, is not unique.
+        **Examples**
+        Square roots of some simple matrices::
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = True
+            >>> sqrtm([[1,0], [0,1]])
+            [1.0  0.0]
+            [0.0  1.0]
+            >>> sqrtm([[0,0], [0,0]])
+            [0.0  0.0]
+            [0.0  0.0]
+            >>> sqrtm([[2,0],[0,1]])
+            [1.4142135623731  0.0]
+            [            0.0  1.0]
+            >>> sqrtm([[1,1],[1,0]])
+            [ (0.920442065259926 - 0.21728689675164j)  (0.568864481005783 + 0.351577584254143j)]
+            [(0.568864481005783 + 0.351577584254143j)  (0.351577584254143 - 0.568864481005783j)]
+            >>> sqrtm([[1,0],[0,1]])
+            [1.0  0.0]
+            [0.0  1.0]
+            >>> sqrtm([[-1,0],[0,1]])
+            [(0.0 - 1.0j)           0.0]
+            [         0.0  (1.0 + 0.0j)]
+            >>> sqrtm([[j,0],[0,j]])
+            [(0.707106781186547 + 0.707106781186547j)                                       0.0]
+            [                                     0.0  (0.707106781186547 + 0.707106781186547j)]
+        A square root of a rotation matrix, giving the corresponding
+        half-angle rotation matrix::
+            >>> t1 = 0.75
+            >>> t2 = t1 * 0.5
+            >>> A1 = matrix([[cos(t1), -sin(t1)], [sin(t1), cos(t1)]])
+            >>> A2 = matrix([[cos(t2), -sin(t2)], [sin(t2), cos(t2)]])
+            >>> sqrtm(A1)
+            [0.930507621912314  -0.366272529086048]
+            [0.366272529086048   0.930507621912314]
+            >>> A2
+            [0.930507621912314  -0.366272529086048]
+            [0.366272529086048   0.930507621912314]
+        The identity `(A^2)^{1/2} = A` does not necessarily hold::
+            >>> A = matrix([[4,1,4],[7,8,9],[10,2,11]])
+            >>> sqrtm(A**2)
+            [ 4.0  1.0   4.0]
+            [ 7.0  8.0   9.0]
+            [10.0  2.0  11.0]
+            >>> sqrtm(A)**2
+            [ 4.0  1.0   4.0]
+            [ 7.0  8.0   9.0]
+            [10.0  2.0  11.0]
+            >>> A = matrix([[-4,1,4],[7,-8,9],[10,2,11]])
+            >>> sqrtm(A**2)
+            [  7.43715112194995  -0.324127569985474   1.8481718827526]
+            [-0.251549715716942    9.32699765900402  2.48221180985147]
+            [  4.11609388833616   0.775751877098258   13.017955697342]
+            >>> chop(sqrtm(A)**2)
+            [-4.0   1.0   4.0]
+            [ 7.0  -8.0   9.0]
+            [10.0   2.0  11.0]
+        For some matrices, a square root does not exist::
+            >>> sqrtm([[0,1], [0,0]])
+            Traceback (most recent call last):
+              ...
+            ZeroDivisionError: matrix is numerically singular
+        Two examples from the documentation for Matlab's ``sqrtm``::
+            >>> mp.dps = 15; mp.pretty = True
+            >>> sqrtm([[7,10],[15,22]])
+            [1.56669890360128  1.74077655955698]
+            [2.61116483933547  4.17786374293675]
+            >>>
+            >>> X = matrix(\
+            ...   [[5,-4,1,0,0],
+            ...   [-4,6,-4,1,0],
+            ...   [1,-4,6,-4,1],
+            ...   [0,1,-4,6,-4],
+            ...   [0,0,1,-4,5]])
+            >>> Y = matrix(\
+            ...   [[2,-1,-0,-0,-0],
+            ...   [-1,2,-1,0,-0],
+            ...   [0,-1,2,-1,0],
+            ...   [-0,0,-1,2,-1],
+            ...   [-0,-0,-0,-1,2]])
+            >>> mnorm(sqrtm(X) - Y)
+            4.53155328326114e-19
+        """
+        A = ctx.matrix(A)
+        # Trivial
+        if A*0 == A:
+            return A
+        prec = ctx.prec
+        if _may_rotate:
+            d = ctx.det(A)
+            if abs(ctx.im(d)) < 16*ctx.eps and ctx.re(d) < 0:
+                return ctx._sqrtm_rot(A, _may_rotate-1)
+        try:
+            ctx.prec += 10
+            tol = ctx.eps * 128
+            Y = A
+            Z = I = A**0
+            k = 0
+            # Denman-Beavers iteration
+            while 1:
+                Yprev = Y
+                try:
+                    Y, Z = 0.5*(Y+ctx.inverse(Z)), 0.5*(Z+ctx.inverse(Y))
+                except ZeroDivisionError:
+                    if _may_rotate:
+                        Y = ctx._sqrtm_rot(A, _may_rotate-1)
+                        break
+                    else:
+                        raise
+                mag1 = ctx.mnorm(Y-Yprev, 'inf')
+                mag2 = ctx.mnorm(Y, 'inf')
+                if mag1 <= mag2*tol:
+                    break
+                if _may_rotate and k > 6 and not mag1 < mag2 * 0.001:
+                    return ctx._sqrtm_rot(A, _may_rotate-1)
+                k += 1
+                if k > ctx.prec:
+                    raise ctx.NoConvergence
+        finally:
+            ctx.prec = prec
+        Y *= 1
+        return Y
+    def logm(ctx, A):
+        r"""
+        Computes a logarithm of the square matrix `A`, i.e. returns
+        a matrix `B = \log(A)` such that `\exp(B) = A`. The logarithm
+        of a matrix, if it exists, is not unique.
+        **Examples**
+        Logarithms of some simple matrices::
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = True
+            >>> X = eye(3)
+            >>> logm(X)
+            [0.0  0.0  0.0]
+            [0.0  0.0  0.0]
+            [0.0  0.0  0.0]
+            >>> logm(2*X)
+            [0.693147180559945                0.0                0.0]
+            [              0.0  0.693147180559945                0.0]
+            [              0.0                0.0  0.693147180559945]
+            >>> logm(expm(X))
+            [1.0  0.0  0.0]
+            [0.0  1.0  0.0]
+            [0.0  0.0  1.0]
+        A logarithm of a complex matrix::
+            >>> X = matrix([[2+j, 1, 3], [1-j, 1-2*j, 1], [-4, -5, j]])
+            >>> B = logm(X)
+            >>> nprint(B)
+            [ (0.808757 + 0.107759j)    (2.20752 + 0.202762j)   (1.07376 - 0.773874j)]
+            [ (0.905709 - 0.107795j)  (0.0287395 - 0.824993j)  (0.111619 + 0.514272j)]
+            [(-0.930151 + 0.399512j)   (-2.06266 - 0.674397j)  (0.791552 + 0.519839j)]
+            >>> chop(expm(B))
+            [(2.0 + 1.0j)           1.0           3.0]
+            [(1.0 - 1.0j)  (1.0 - 2.0j)           1.0]
+            [        -4.0          -5.0  (0.0 + 1.0j)]
+        A matrix `X` close to the identity matrix, for which
+        `\log(\exp(X)) = \exp(\log(X)) = X` holds::
+            >>> X = eye(3) + hilbert(3)/4
+            >>> X
+            [              1.25             0.125  0.0833333333333333]
+            [             0.125  1.08333333333333              0.0625]
+            [0.0833333333333333            0.0625                1.05]
+            >>> logm(expm(X))
+            [              1.25             0.125  0.0833333333333333]
+            [             0.125  1.08333333333333              0.0625]
+            [0.0833333333333333            0.0625                1.05]
+            >>> expm(logm(X))
+            [              1.25             0.125  0.0833333333333333]
+            [             0.125  1.08333333333333              0.0625]
+            [0.0833333333333333            0.0625                1.05]
+        A logarithm of a rotation matrix, giving back the angle of
+        the rotation::
+            >>> t = 3.7
+            >>> A = matrix([[cos(t),sin(t)],[-sin(t),cos(t)]])
+            >>> chop(logm(A))
+            [             0.0  -2.58318530717959]
+            [2.58318530717959                0.0]
+            >>> (2*pi-t)
+            2.58318530717959
+        For some matrices, a logarithm does not exist::
+            >>> logm([[1,0], [0,0]])
+            Traceback (most recent call last):
+              ...
+            ZeroDivisionError: matrix is numerically singular
+        Logarithm of a matrix with large entries::
+            >>> logm(hilbert(3) * 10**20).apply(re)
+            [ 45.5597513593433  1.27721006042799  0.317662687717978]
+            [ 1.27721006042799  42.5222778973542   2.24003708791604]
+            [0.317662687717978  2.24003708791604    42.395212822267]
+        """
+        A = ctx.matrix(A)
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            tol = ctx.eps * 128
+            I = A**0
+            B = A
+            n = 0
+            while 1:
+                B = ctx.sqrtm(B)
+                n += 1
+                if ctx.mnorm(B-I, 'inf') < 0.125:
+                    break
+            T = X = B-I
+            L = X*0
+            k = 1
+            while 1:
+                if k & 1:
+                    L += T / k
+                else:
+                    L -= T / k
+                T *= X
+                if ctx.mnorm(T, 'inf') < tol:
+                    break
+                k += 1
+                if k > ctx.prec:
+                    raise ctx.NoConvergence
+        finally:
+            ctx.prec = prec
+        L *= 2**n
+        return L
+    def powm(ctx, A, r):
+        r"""
+        Computes `A^r = \exp(A \log r)` for a matrix `A` and complex
+        number `r`.
+        **Examples**
+        Powers and inverse powers of a matrix::
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = True
+            >>> A = matrix([[4,1,4],[7,8,9],[10,2,11]])
+            >>> powm(A, 2)
+            [ 63.0  20.0   69.0]
+            [174.0  89.0  199.0]
+            [164.0  48.0  179.0]
+            >>> chop(powm(powm(A, 4), 1/4.))
+            [ 4.0  1.0   4.0]
+            [ 7.0  8.0   9.0]
+            [10.0  2.0  11.0]
+            >>> powm(extraprec(20)(powm)(A, -4), -1/4.)
+            [ 4.0  1.0   4.0]
+            [ 7.0  8.0   9.0]
+            [10.0  2.0  11.0]
+            >>> chop(powm(powm(A, 1+0.5j), 1/(1+0.5j)))
+            [ 4.0  1.0   4.0]
+            [ 7.0  8.0   9.0]
+            [10.0  2.0  11.0]
+            >>> powm(extraprec(5)(powm)(A, -1.5), -1/(1.5))
+            [ 4.0  1.0   4.0]
+            [ 7.0  8.0   9.0]
+            [10.0  2.0  11.0]
+        A Fibonacci-generating matrix::
+            >>> powm([[1,1],[1,0]], 10)
+            [89.0  55.0]
+            [55.0  34.0]
+            >>> fib(10)
+            55.0
+            >>> powm([[1,1],[1,0]], 6.5)
+            [(16.5166626964253 - 0.0121089837381789j)  (10.2078589271083 + 0.0195927472575932j)]
+            [(10.2078589271083 + 0.0195927472575932j)  (6.30880376931698 - 0.0317017309957721j)]
+            >>> (phi**6.5 - (1-phi)**6.5)/sqrt(5)
+            (10.2078589271083 - 0.0195927472575932j)
+            >>> powm([[1,1],[1,0]], 6.2)
+            [ (14.3076953002666 - 0.008222855781077j)  (8.81733464837593 + 0.0133048601383712j)]
+            [(8.81733464837593 + 0.0133048601383712j)  (5.49036065189071 - 0.0215277159194482j)]
+            >>> (phi**6.2 - (1-phi)**6.2)/sqrt(5)
+            (8.81733464837593 - 0.0133048601383712j)
+        """
+        A = ctx.matrix(A)
+        r = ctx.convert(r)
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            if ctx.isint(r):
+                v = A ** int(r)
+            elif ctx.isint(r*2):
+                y = int(r*2)
+                v = ctx.sqrtm(A) ** y
+            else:
+                v = ctx.expm(r*ctx.logm(A))
+        finally:
+            ctx.prec = prec
+        v *= 1
+        return v

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/eigen.py ADDED Viewed

	@@ -0,0 +1,877 @@

+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+##################################################################################################
+#     module for the eigenvalue problem
+#       Copyright 2013 Timo Hartmann (thartmann15 at gmail.com)
+#
+# todo:
+#  - implement balancing
+#  - agressive early deflation
+#
+##################################################################################################
+"""
+The eigenvalue problem
+----------------------
+This file contains routines for the eigenvalue problem.
+high level routines:
+  hessenberg : reduction of a real or complex square matrix to upper Hessenberg form
+  schur : reduction of a real or complex square matrix to upper Schur form
+  eig : eigenvalues and eigenvectors of a real or complex square matrix
+low level routines:
+  hessenberg_reduce_0 : reduction of a real or complex square matrix to upper Hessenberg form
+  hessenberg_reduce_1 : auxiliary routine to hessenberg_reduce_0
+  qr_step : a single implicitly shifted QR step for an upper Hessenberg matrix
+  hessenberg_qr : Schur decomposition of an upper Hessenberg matrix
+  eig_tr_r : right eigenvectors of an upper triangular matrix
+  eig_tr_l : left  eigenvectors of an upper triangular matrix
+"""
+from ..libmp.backend import xrange
+class Eigen(object):
+    pass
+def defun(f):
+    setattr(Eigen, f.__name__, f)
+    return f
+def hessenberg_reduce_0(ctx, A, T):
+    """
+    This routine computes the (upper) Hessenberg decomposition of a square matrix A.
+    Given A, an unitary matrix Q is calculated such that
+               Q' A Q = H              and             Q' Q = Q Q' = 1
+    where H is an upper Hessenberg matrix, meaning that it only contains zeros
+    below the first subdiagonal. Here ' denotes the hermitian transpose (i.e.
+    transposition and conjugation).
+    parameters:
+      A         (input/output) On input, A contains the square matrix A of
+                dimension (n,n). On output, A contains a compressed representation
+                of Q and H.
+      T         (output) An array of length n containing the first elements of
+                the Householder reflectors.
+    """
+    # internally we work with householder reflections from the right.
+    # let u be a row vector (i.e. u[i]=A[i,:i]). then
+    # Q is build up by reflectors of the type (1-v'v) where v is a suitable
+    # modification of u. these reflectors are applyed to A from the right.
+    # because we work with reflectors from the right we have to start with
+    # the bottom row of A and work then upwards (this corresponds to
+    # some kind of RQ decomposition).
+    # the first part of the vectors v (i.e. A[i,:(i-1)]) are stored as row vectors
+    # in the lower left part of A (excluding the diagonal and subdiagonal).
+    # the last entry of v is stored in T.
+    # the upper right part of A (including diagonal and subdiagonal) becomes H.
+    n = A.rows
+    if n <= 2: return
+    for i in xrange(n-1, 1, -1):
+        # scale the vector
+        scale = 0
+        for k in xrange(0, i):
+            scale += abs(ctx.re(A[i,k])) + abs(ctx.im(A[i,k]))
+        scale_inv = 0
+        if scale != 0:
+            scale_inv = 1 / scale
+        if scale == 0 or ctx.isinf(scale_inv):
+            # sadly there are floating point numbers not equal to zero whose reciprocal is infinity
+            T[i] = 0
+            A[i,i-1] = 0
+            continue
+        # calculate parameters for housholder transformation
+        H = 0
+        for k in xrange(0, i):
+            A[i,k] *= scale_inv
+            rr = ctx.re(A[i,k])
+            ii = ctx.im(A[i,k])
+            H += rr * rr + ii * ii
+        F = A[i,i-1]
+        f = abs(F)
+        G = ctx.sqrt(H)
+        A[i,i-1] = - G * scale
+        if f == 0:
+            T[i] = G
+        else:
+            ff = F / f
+            T[i] = F + G * ff
+            A[i,i-1] *= ff
+        H += G * f
+        H = 1 / ctx.sqrt(H)
+        T[i] *= H
+        for k in xrange(0, i - 1):
+            A[i,k] *= H
+        for j in xrange(0, i):
+            # apply housholder transformation (from right)
+            G = ctx.conj(T[i]) * A[j,i-1]
+            for k in xrange(0, i-1):
+                G += ctx.conj(A[i,k]) * A[j,k]
+            A[j,i-1] -= G * T[i]
+            for k in xrange(0, i-1):
+                A[j,k] -= G * A[i,k]
+        for j in xrange(0, n):
+            # apply housholder transformation (from left)
+            G = T[i] * A[i-1,j]
+            for k in xrange(0, i-1):
+                G += A[i,k] * A[k,j]
+            A[i-1,j] -= G * ctx.conj(T[i])
+            for k in xrange(0, i-1):
+                A[k,j] -= G * ctx.conj(A[i,k])
+def hessenberg_reduce_1(ctx, A, T):
+    """
+    This routine forms the unitary matrix Q described in hessenberg_reduce_0.
+    parameters:
+      A    (input/output) On input, A is the same matrix as delivered by
+           hessenberg_reduce_0. On output, A is set to Q.
+      T    (input) On input, T is the same array as delivered by hessenberg_reduce_0.
+    """
+    n = A.rows
+    if n == 1:
+        A[0,0] = 1
+        return
+    A[0,0] = A[1,1] = 1
+    A[0,1] = A[1,0] = 0
+    for i in xrange(2, n):
+        if T[i] != 0:
+            for j in xrange(0, i):
+                G = T[i] * A[i-1,j]
+                for k in xrange(0, i-1):
+                    G += A[i,k] * A[k,j]
+                A[i-1,j] -= G * ctx.conj(T[i])
+                for k in xrange(0, i-1):
+                    A[k,j] -= G * ctx.conj(A[i,k])
+        A[i,i] = 1
+        for j in xrange(0, i):
+            A[j,i] = A[i,j] = 0
+@defun
+def hessenberg(ctx, A, overwrite_a = False):
+    """
+    This routine computes the Hessenberg decomposition of a square matrix A.
+    Given A, an unitary matrix Q is determined such that
+          Q' A Q = H                and               Q' Q = Q Q' = 1
+    where H is an upper right Hessenberg matrix. Here ' denotes the hermitian
+    transpose (i.e. transposition and conjugation).
+    input:
+      A            : a real or complex square matrix
+      overwrite_a  : if true, allows modification of A which may improve
+                     performance. if false, A is not modified.
+    output:
+      Q : an unitary matrix
+      H : an upper right Hessenberg matrix
+    example:
+      >>> from mpmath import mp
+      >>> A = mp.matrix([[3, -1, 2], [2, 5, -5], [-2, -3, 7]])
+      >>> Q, H = mp.hessenberg(A)
+      >>> mp.nprint(H, 3) # doctest:+SKIP
+      [  3.15  2.23  4.44]
+      [-0.769  4.85  3.05]
+      [   0.0  3.61   7.0]
+      >>> print(mp.chop(A - Q * H * Q.transpose_conj()))
+      [0.0  0.0  0.0]
+      [0.0  0.0  0.0]
+      [0.0  0.0  0.0]
+    return value:   (Q, H)
+    """
+    n = A.rows
+    if n == 1:
+        return (ctx.matrix([[1]]), A)
+    if not overwrite_a:
+        A = A.copy()
+    T = ctx.matrix(n, 1)
+    hessenberg_reduce_0(ctx, A, T)
+    Q = A.copy()
+    hessenberg_reduce_1(ctx, Q, T)
+    for x in xrange(n):
+        for y in xrange(x+2, n):
+            A[y,x] = 0
+    return Q, A
+###########################################################################
+def qr_step(ctx, n0, n1, A, Q, shift):
+    """
+    This subroutine executes a single implicitly shifted QR step applied to an
+    upper Hessenberg matrix A. Given A and shift as input, first an QR
+    decomposition is calculated:
+      Q R = A - shift * 1 .
+    The output is then following matrix:
+      R Q + shift * 1
+    parameters:
+      n0, n1    (input) Two integers which specify the submatrix A[n0:n1,n0:n1]
+                on which this subroutine operators. The subdiagonal elements
+                to the left and below this submatrix must be deflated (i.e. zero).
+                following restriction is imposed: n1>=n0+2
+      A         (input/output) On input, A is an upper Hessenberg matrix.
+                On output, A is replaced by "R Q + shift * 1"
+      Q         (input/output) The parameter Q is multiplied by the unitary matrix
+                Q arising from the QR decomposition. Q can also be false, in which
+                case the unitary matrix Q is not computated.
+      shift     (input) a complex number specifying the shift. idealy close to an
+                eigenvalue of the bottemmost part of the submatrix A[n0:n1,n0:n1].
+    references:
+      Stoer, Bulirsch - Introduction to Numerical Analysis.
+      Kresser : Numerical Methods for General and Structured Eigenvalue Problems
+    """
+    # implicitly shifted and bulge chasing is explained at p.398/399 in "Stoer, Bulirsch - Introduction to Numerical Analysis"
+    # for bulge chasing see also "Watkins - The Matrix Eigenvalue Problem" sec.4.5,p.173
+    # the Givens rotation we used is determined as follows: let c,s be two complex
+    # numbers. then we have following relation:
+    #
+    #     v = sqrt(|c|^2 + |s|^2)
+    #
+    #     1/v [ c~  s~]  [c] = [v]
+    #         [-s   c ]  [s]   [0]
+    #
+    # the matrix on the left is our Givens rotation.
+    n = A.rows
+    # first step
+    # calculate givens rotation
+    c = A[n0  ,n0] - shift
+    s = A[n0+1,n0]
+    v = ctx.hypot(ctx.hypot(ctx.re(c), ctx.im(c)), ctx.hypot(ctx.re(s), ctx.im(s)))
+    if v == 0:
+        v = 1
+        c = 1
+        s = 0
+    else:
+        c /= v
+        s /= v
+    cc = ctx.conj(c)
+    cs = ctx.conj(s)
+    for k in xrange(n0, n):
+        # apply givens rotation from the left
+        x = A[n0  ,k]
+        y = A[n0+1,k]
+        A[n0  ,k] = cc * x + cs * y
+        A[n0+1,k] = c * y - s * x
+    for k in xrange(min(n1, n0+3)):
+        # apply givens rotation from the right
+        x = A[k,n0  ]
+        y = A[k,n0+1]
+        A[k,n0  ] = c * x + s * y
+        A[k,n0+1] = cc * y - cs * x
+    if not isinstance(Q, bool):
+        for k in xrange(n):
+            # eigenvectors
+            x = Q[k,n0  ]
+            y = Q[k,n0+1]
+            Q[k,n0  ] = c * x + s * y
+            Q[k,n0+1] = cc * y - cs * x
+    # chase the bulge
+    for j in xrange(n0, n1 - 2):
+        # calculate givens rotation
+        c = A[j+1,j]
+        s = A[j+2,j]
+        v = ctx.hypot(ctx.hypot(ctx.re(c), ctx.im(c)), ctx.hypot(ctx.re(s), ctx.im(s)))
+        if v == 0:
+            A[j+1,j] = 0
+            v = 1
+            c = 1
+            s = 0
+        else:
+            A[j+1,j] = v
+            c /= v
+            s /= v
+        A[j+2,j] = 0
+        cc = ctx.conj(c)
+        cs = ctx.conj(s)
+        for k in xrange(j+1, n):
+            # apply givens rotation from the left
+            x = A[j+1,k]
+            y = A[j+2,k]
+            A[j+1,k] = cc * x + cs * y
+            A[j+2,k] = c * y - s * x
+        for k in xrange(0, min(n1, j+4)):
+            # apply givens rotation from the right
+            x = A[k,j+1]
+            y = A[k,j+2]
+            A[k,j+1] = c * x + s * y
+            A[k,j+2] = cc * y - cs * x
+        if not isinstance(Q, bool):
+            for k in xrange(0, n):
+                # eigenvectors
+                x = Q[k,j+1]
+                y = Q[k,j+2]
+                Q[k,j+1] = c * x + s * y
+                Q[k,j+2] = cc * y - cs * x
+def hessenberg_qr(ctx, A, Q):
+    """
+    This routine computes the Schur decomposition of an upper Hessenberg matrix A.
+    Given A, an unitary matrix Q is determined such that
+          Q' A Q = R                   and                  Q' Q = Q Q' = 1
+    where R is an upper right triangular matrix. Here ' denotes the hermitian
+    transpose (i.e. transposition and conjugation).
+    parameters:
+      A         (input/output) On input, A contains an upper Hessenberg matrix.
+                On output, A is replace by the upper right triangluar matrix R.
+      Q         (input/output) The parameter Q is multiplied by the unitary
+                matrix Q arising from the Schur decomposition. Q can also be
+                false, in which case the unitary matrix Q is not computated.
+    """
+    n = A.rows
+    norm = 0
+    for x in xrange(n):
+        for y in xrange(min(x+2, n)):
+            norm += ctx.re(A[y,x]) ** 2 + ctx.im(A[y,x]) ** 2
+    norm = ctx.sqrt(norm) / n
+    if norm == 0:
+        return
+    n0 = 0
+    n1 = n
+    eps = ctx.eps / (100 * n)
+    maxits = ctx.dps * 4
+    its = totalits = 0
+    while 1:
+        # kressner p.32 algo 3
+        # the active submatrix is A[n0:n1,n0:n1]
+        k = n0
+        while k + 1 < n1:
+            s = abs(ctx.re(A[k,k])) + abs(ctx.im(A[k,k])) + abs(ctx.re(A[k+1,k+1])) + abs(ctx.im(A[k+1,k+1]))
+            if s < eps * norm:
+                s = norm
+            if abs(A[k+1,k]) < eps * s:
+                break
+            k += 1
+        if k + 1 < n1:
+            # deflation found at position (k+1, k)
+            A[k+1,k] = 0
+            n0 = k + 1
+            its = 0
+            if n0 + 1 >= n1:
+                # block of size at most two has converged
+                n0 = 0
+                n1 = k + 1
+                if n1 < 2:
+                    # QR algorithm has converged
+                    return
+        else:
+            if (its % 30) == 10:
+                # exceptional shift
+                shift = A[n1-1,n1-2]
+            elif (its % 30) == 20:
+                # exceptional shift
+                shift = abs(A[n1-1,n1-2])
+            elif (its % 30) == 29:
+                # exceptional shift
+                shift = norm
+            else:
+                #    A = [ a b ]       det(x-A)=x*x-x*tr(A)+det(A)
+                #        [ c d ]
+                #
+                # eigenvalues bad:   (tr(A)+sqrt((tr(A))**2-4*det(A)))/2
+                #     bad because of cancellation if |c| is small and |a-d| is small, too.
+                #
+                # eigenvalues good:     (a+d+sqrt((a-d)**2+4*b*c))/2
+                t =  A[n1-2,n1-2] + A[n1-1,n1-1]
+                s = (A[n1-1,n1-1] - A[n1-2,n1-2]) ** 2 + 4 * A[n1-1,n1-2] * A[n1-2,n1-1]
+                if ctx.re(s) > 0:
+                    s = ctx.sqrt(s)
+                else:
+                    s = ctx.sqrt(-s) * 1j
+                a = (t + s) / 2
+                b = (t - s) / 2
+                if abs(A[n1-1,n1-1] - a) > abs(A[n1-1,n1-1] - b):
+                    shift = b
+                else:
+                    shift = a
+            its += 1
+            totalits += 1
+            qr_step(ctx, n0, n1, A, Q, shift)
+            if its > maxits:
+                raise RuntimeError("qr: failed to converge after %d steps" % its)
+@defun
+def schur(ctx, A, overwrite_a = False):
+    """
+    This routine computes the Schur decomposition of a square matrix A.
+    Given A, an unitary matrix Q is determined such that
+          Q' A Q = R                and               Q' Q = Q Q' = 1
+    where R is an upper right triangular matrix. Here ' denotes the
+    hermitian transpose (i.e. transposition and conjugation).
+    input:
+      A            : a real or complex square matrix
+      overwrite_a  : if true, allows modification of A which may improve
+                     performance. if false, A is not modified.
+    output:
+      Q : an unitary matrix
+      R : an upper right triangular matrix
+    return value:   (Q, R)
+    example:
+      >>> from mpmath import mp
+      >>> A = mp.matrix([[3, -1, 2], [2, 5, -5], [-2, -3, 7]])
+      >>> Q, R = mp.schur(A)
+      >>> mp.nprint(R, 3) # doctest:+SKIP
+      [2.0  0.417  -2.53]
+      [0.0    4.0  -4.74]
+      [0.0    0.0    9.0]
+      >>> print(mp.chop(A - Q * R * Q.transpose_conj()))
+      [0.0  0.0  0.0]
+      [0.0  0.0  0.0]
+      [0.0  0.0  0.0]
+    warning: The Schur decomposition is not unique.
+    """
+    n = A.rows
+    if n == 1:
+        return (ctx.matrix([[1]]), A)
+    if not overwrite_a:
+        A = A.copy()
+    T = ctx.matrix(n, 1)
+    hessenberg_reduce_0(ctx, A, T)
+    Q = A.copy()
+    hessenberg_reduce_1(ctx, Q, T)
+    for x in xrange(n):
+        for y in xrange(x + 2, n):
+            A[y,x] = 0
+    hessenberg_qr(ctx, A, Q)
+    return Q, A
+def eig_tr_r(ctx, A):
+    """
+    This routine calculates the right eigenvectors of an upper right triangular matrix.
+    input:
+      A      an upper right triangular matrix
+    output:
+      ER     a matrix whose columns form the right eigenvectors of A
+    return value: ER
+    """
+    # this subroutine is inspired by the lapack routines ctrevc.f,clatrs.f
+    n = A.rows
+    ER = ctx.eye(n)
+    eps = ctx.eps
+    unfl = ctx.ldexp(ctx.one, -ctx.prec * 30)
+    # since mpmath effectively has no limits on the exponent, we simply scale doubles up
+    # original double has prec*20
+    smlnum = unfl * (n / eps)
+    simin = 1 / ctx.sqrt(eps)
+    rmax = 1
+    for i in xrange(1, n):
+        s = A[i,i]
+        smin = max(eps * abs(s), smlnum)
+        for j in xrange(i - 1, -1, -1):
+            r = 0
+            for k in xrange(j + 1, i + 1):
+                r += A[j,k] * ER[k,i]
+            t = A[j,j] - s
+            if abs(t) < smin:
+                t = smin
+            r = -r / t
+            ER[j,i] = r
+            rmax = max(rmax, abs(r))
+            if rmax > simin:
+                for k in xrange(j, i+1):
+                    ER[k,i] /= rmax
+                rmax = 1
+        if rmax != 1:
+            for k in xrange(0, i + 1):
+                ER[k,i] /= rmax
+    return ER
+def eig_tr_l(ctx, A):
+    """
+    This routine calculates the left eigenvectors of an upper right triangular matrix.
+    input:
+      A      an upper right triangular matrix
+    output:
+      EL     a matrix whose rows form the left eigenvectors of A
+    return value:  EL
+    """
+    n = A.rows
+    EL = ctx.eye(n)
+    eps = ctx.eps
+    unfl = ctx.ldexp(ctx.one, -ctx.prec * 30)
+    # since mpmath effectively has no limits on the exponent, we simply scale doubles up
+    # original double has prec*20
+    smlnum = unfl * (n / eps)
+    simin = 1 / ctx.sqrt(eps)
+    rmax = 1
+    for i in xrange(0, n - 1):
+        s = A[i,i]
+        smin = max(eps * abs(s), smlnum)
+        for j in xrange(i + 1, n):
+            r = 0
+            for k in xrange(i, j):
+                r += EL[i,k] * A[k,j]
+            t = A[j,j] - s
+            if abs(t) < smin:
+                t = smin
+            r = -r / t
+            EL[i,j] = r
+            rmax = max(rmax, abs(r))
+            if rmax > simin:
+                for k in xrange(i, j + 1):
+                    EL[i,k] /= rmax
+                rmax = 1
+        if rmax != 1:
+            for k in xrange(i, n):
+                EL[i,k] /= rmax
+    return EL
+@defun
+def eig(ctx, A, left = False, right = True, overwrite_a = False):
+    """
+    This routine computes the eigenvalues and optionally the left and right
+    eigenvectors of a square matrix A. Given A, a vector E and matrices ER
+    and EL are calculated such that
+                        A ER[:,i] =         E[i] ER[:,i]
+                EL[i,:] A         = EL[i,:] E[i]
+    E contains the eigenvalues of A. The columns of ER contain the right eigenvectors
+    of A whereas the rows of EL contain the left eigenvectors.
+    input:
+      A           : a real or complex square matrix of shape (n, n)
+      left        : if true, the left eigenvectors are calculated.
+      right       : if true, the right eigenvectors are calculated.
+      overwrite_a : if true, allows modification of A which may improve
+                    performance. if false, A is not modified.
+    output:
+      E    : a list of length n containing the eigenvalues of A.
+      ER   : a matrix whose columns contain the right eigenvectors of A.
+      EL   : a matrix whose rows contain the left eigenvectors of A.
+    return values:
+       E            if left and right are both false.
+      (E, ER)       if right is true and left is false.
+      (E, EL)       if left is true and right is false.
+      (E, EL, ER)   if left and right are true.
+    examples:
+      >>> from mpmath import mp
+      >>> A = mp.matrix([[3, -1, 2], [2, 5, -5], [-2, -3, 7]])
+      >>> E, ER = mp.eig(A)
+      >>> print(mp.chop(A * ER[:,0] - E[0] * ER[:,0]))
+      [0.0]
+      [0.0]
+      [0.0]
+      >>> E, EL, ER = mp.eig(A,left = True, right = True)
+      >>> E, EL, ER = mp.eig_sort(E, EL, ER)
+      >>> mp.nprint(E)
+      [2.0, 4.0, 9.0]
+      >>> print(mp.chop(A * ER[:,0] - E[0] * ER[:,0]))
+      [0.0]
+      [0.0]
+      [0.0]
+      >>> print(mp.chop( EL[0,:] * A - EL[0,:] * E[0]))
+      [0.0  0.0  0.0]
+    warning:
+     - If there are multiple eigenvalues, the eigenvectors do not necessarily
+       span the whole vectorspace, i.e. ER and EL may have not full rank.
+       Furthermore in that case the eigenvectors are numerical ill-conditioned.
+     - In the general case the eigenvalues have no natural order.
+    see also:
+      - eigh (or eigsy, eighe) for the symmetric eigenvalue problem.
+      - eig_sort for sorting of eigenvalues and eigenvectors
+    """
+    n = A.rows
+    if n == 1:
+        if left and (not right):
+            return ([A[0]], ctx.matrix([[1]]))
+        if right and (not left):
+            return ([A[0]], ctx.matrix([[1]]))
+        return ([A[0]], ctx.matrix([[1]]), ctx.matrix([[1]]))
+    if not overwrite_a:
+        A = A.copy()
+    T = ctx.zeros(n, 1)
+    hessenberg_reduce_0(ctx, A, T)
+    if left or right:
+        Q = A.copy()
+        hessenberg_reduce_1(ctx, Q, T)
+    else:
+        Q = False
+    for x in xrange(n):
+        for y in xrange(x + 2, n):
+            A[y,x] = 0
+    hessenberg_qr(ctx, A, Q)
+    E = [0 for i in xrange(n)]
+    for i in xrange(n):
+        E[i] = A[i,i]
+    if not (left or right):
+        return E
+    if left:
+        EL = eig_tr_l(ctx, A)
+        EL = EL * Q.transpose_conj()
+    if right:
+        ER = eig_tr_r(ctx, A)
+        ER = Q * ER
+    if left and (not right):
+        return (E, EL)
+    if right and (not left):
+        return (E, ER)
+    return (E, EL, ER)
+@defun
+def eig_sort(ctx, E, EL = False, ER = False, f = "real"):
+    """
+    This routine sorts the eigenvalues and eigenvectors delivered by ``eig``.
+    parameters:
+      E  : the eigenvalues as delivered by eig
+      EL : the left  eigenvectors as delivered by eig, or false
+      ER : the right eigenvectors as delivered by eig, or false
+      f  : either a string ("real" sort by increasing real part, "imag" sort by
+           increasing imag part, "abs" sort by absolute value) or a function
+           mapping complexs to the reals, i.e. ``f = lambda x: -mp.re(x) ``
+           would sort the eigenvalues by decreasing real part.
+    return values:
+       E            if EL and ER are both false.
+      (E, ER)       if ER is not false and left is false.
+      (E, EL)       if EL is not false and right is false.
+      (E, EL, ER)   if EL and ER are not false.
+    example:
+      >>> from mpmath import mp
+      >>> A = mp.matrix([[3, -1, 2], [2, 5, -5], [-2, -3, 7]])
+      >>> E, EL, ER = mp.eig(A,left = True, right = True)
+      >>> E, EL, ER = mp.eig_sort(E, EL, ER)
+      >>> mp.nprint(E)
+      [2.0, 4.0, 9.0]
+      >>> E, EL, ER = mp.eig_sort(E, EL, ER,f = lambda x: -mp.re(x))
+      >>> mp.nprint(E)
+      [9.0, 4.0, 2.0]
+      >>> print(mp.chop(A * ER[:,0] - E[0] * ER[:,0]))
+      [0.0]
+      [0.0]
+      [0.0]
+      >>> print(mp.chop( EL[0,:] * A - EL[0,:] * E[0]))
+      [0.0  0.0  0.0]
+    """
+    if isinstance(f, str):
+        if f == "real":
+            f = ctx.re
+        elif f == "imag":
+            f = ctx.im
+        elif f == "abs":
+            f = abs
+        else:
+            raise RuntimeError("unknown function %s" % f)
+    n = len(E)
+    # Sort eigenvalues (bubble-sort)
+    for i in xrange(n):
+        imax = i
+        s = f(E[i])         # s is the current maximal element
+        for j in xrange(i + 1, n):
+            c = f(E[j])
+            if c < s:
+                s = c
+                imax = j
+        if imax != i:
+            # swap eigenvalues
+            z = E[i]
+            E[i] = E[imax]
+            E[imax] = z
+            if not isinstance(EL, bool):
+                for j in xrange(n):
+                    z = EL[i,j]
+                    EL[i,j] = EL[imax,j]
+                    EL[imax,j] = z
+            if not isinstance(ER, bool):
+                for j in xrange(n):
+                    z = ER[j,i]
+                    ER[j,i] = ER[j,imax]
+                    ER[j,imax] = z
+    if isinstance(EL, bool) and isinstance(ER, bool):
+        return E
+    if isinstance(EL, bool) and not(isinstance(ER, bool)):
+        return (E, ER)
+    if isinstance(ER, bool) and not(isinstance(EL, bool)):
+        return (E, EL)
+    return (E, EL, ER)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/linalg.py ADDED Viewed

	@@ -0,0 +1,790 @@

+"""
+Linear algebra
+--------------
+Linear equations
+................
+Basic linear algebra is implemented; you can for example solve the linear
+equation system::
+      x + 2*y = -10
+    3*x + 4*y =  10
+using ``lu_solve``::
+    >>> from mpmath import *
+    >>> mp.pretty = False
+    >>> A = matrix([[1, 2], [3, 4]])
+    >>> b = matrix([-10, 10])
+    >>> x = lu_solve(A, b)
+    >>> x
+    matrix(
+    [['30.0'],
+     ['-20.0']])
+If you don't trust the result, use ``residual`` to calculate the residual ||A*x-b||::
+    >>> residual(A, x, b)
+    matrix(
+    [['3.46944695195361e-18'],
+     ['3.46944695195361e-18']])
+    >>> str(eps)
+    '2.22044604925031e-16'
+As you can see, the solution is quite accurate. The error is caused by the
+inaccuracy of the internal floating point arithmetic. Though, it's even smaller
+than the current machine epsilon, which basically means you can trust the
+result.
+If you need more speed, use NumPy, or ``fp.lu_solve`` for a floating-point computation.
+    >>> fp.lu_solve(A, b)   # doctest: +ELLIPSIS
+    matrix(...)
+``lu_solve`` accepts overdetermined systems. It is usually not possible to solve
+such systems, so the residual is minimized instead. Internally this is done
+using Cholesky decomposition to compute a least squares approximation. This means
+that that ``lu_solve`` will square the errors. If you can't afford this, use
+``qr_solve`` instead. It is twice as slow but more accurate, and it calculates
+the residual automatically.
+Matrix factorization
+....................
+The function ``lu`` computes an explicit LU factorization of a matrix::
+    >>> P, L, U = lu(matrix([[0,2,3],[4,5,6],[7,8,9]]))
+    >>> print(P)
+    [0.0  0.0  1.0]
+    [1.0  0.0  0.0]
+    [0.0  1.0  0.0]
+    >>> print(L)
+    [              1.0                0.0  0.0]
+    [              0.0                1.0  0.0]
+    [0.571428571428571  0.214285714285714  1.0]
+    >>> print(U)
+    [7.0  8.0                9.0]
+    [0.0  2.0                3.0]
+    [0.0  0.0  0.214285714285714]
+    >>> print(P.T*L*U)
+    [0.0  2.0  3.0]
+    [4.0  5.0  6.0]
+    [7.0  8.0  9.0]
+Interval matrices
+-----------------
+Matrices may contain interval elements. This allows one to perform
+basic linear algebra operations such as matrix multiplication
+and equation solving with rigorous error bounds::
+    >>> a = iv.matrix([['0.1','0.3','1.0'],
+    ...             ['7.1','5.5','4.8'],
+    ...             ['3.2','4.4','5.6']])
+    >>>
+    >>> b = iv.matrix(['4','0.6','0.5'])
+    >>> c = iv.lu_solve(a, b)
+    >>> print(c)
+    [   [5.2582327113062568605927528666, 5.25823271130625686059275702219]]
+    [[-13.1550493962678375411635581388, -13.1550493962678375411635540152]]
+    [  [7.42069154774972557628979076189, 7.42069154774972557628979190734]]
+    >>> print(a*c)
+    [  [3.99999999999999999999999844904, 4.00000000000000000000000155096]]
+    [[0.599999999999999999999968898009, 0.600000000000000000000031763736]]
+    [[0.499999999999999999999979320485, 0.500000000000000000000020679515]]
+"""
+# TODO:
+# *implement high-level qr()
+# *test unitvector
+# *iterative solving
+from copy import copy
+from ..libmp.backend import xrange
+class LinearAlgebraMethods(object):
+    def LU_decomp(ctx, A, overwrite=False, use_cache=True):
+        """
+        LU-factorization of a n*n matrix using the Gauss algorithm.
+        Returns L and U in one matrix and the pivot indices.
+        Use overwrite to specify whether A will be overwritten with L and U.
+        """
+        if not A.rows == A.cols:
+            raise ValueError('need n*n matrix')
+        # get from cache if possible
+        if use_cache and isinstance(A, ctx.matrix) and A._LU:
+            return A._LU
+        if not overwrite:
+            orig = A
+            A = A.copy()
+        tol = ctx.absmin(ctx.mnorm(A,1) * ctx.eps) # each pivot element has to be bigger
+        n = A.rows
+        p = [None]*(n - 1)
+        for j in xrange(n - 1):
+            # pivoting, choose max(abs(reciprocal row sum)*abs(pivot element))
+            biggest = 0
+            for k in xrange(j, n):
+                s = ctx.fsum([ctx.absmin(A[k,l]) for l in xrange(j, n)])
+                if ctx.absmin(s) <= tol:
+                    raise ZeroDivisionError('matrix is numerically singular')
+                current = 1/s * ctx.absmin(A[k,j])
+                if current > biggest: # TODO: what if equal?
+                    biggest = current
+                    p[j] = k
+            # swap rows according to p
+            ctx.swap_row(A, j, p[j])
+            if ctx.absmin(A[j,j]) <= tol:
+                raise ZeroDivisionError('matrix is numerically singular')
+            # calculate elimination factors and add rows
+            for i in xrange(j + 1, n):
+                A[i,j] /= A[j,j]
+                for k in xrange(j + 1, n):
+                    A[i,k] -= A[i,j]*A[j,k]
+        if ctx.absmin(A[n - 1,n - 1]) <= tol:
+            raise ZeroDivisionError('matrix is numerically singular')
+        # cache decomposition
+        if not overwrite and isinstance(orig, ctx.matrix):
+            orig._LU = (A, p)
+        return A, p
+    def L_solve(ctx, L, b, p=None):
+        """
+        Solve the lower part of a LU factorized matrix for y.
+        """
+        if L.rows != L.cols:
+            raise RuntimeError("need n*n matrix")
+        n = L.rows
+        if len(b) != n:
+            raise ValueError("Value should be equal to n")
+        b = copy(b)
+        if p: # swap b according to p
+            for k in xrange(0, len(p)):
+                ctx.swap_row(b, k, p[k])
+        # solve
+        for i in xrange(1, n):
+            for j in xrange(i):
+                b[i] -= L[i,j] * b[j]
+        return b
+    def U_solve(ctx, U, y):
+        """
+        Solve the upper part of a LU factorized matrix for x.
+        """
+        if U.rows != U.cols:
+            raise RuntimeError("need n*n matrix")
+        n = U.rows
+        if len(y) != n:
+            raise ValueError("Value should be equal to n")
+        x = copy(y)
+        for i in xrange(n - 1, -1, -1):
+            for j in xrange(i + 1, n):
+                x[i] -= U[i,j] * x[j]
+            x[i] /= U[i,i]
+        return x
+    def lu_solve(ctx, A, b, **kwargs):
+        """
+        Ax = b => x
+        Solve a determined or overdetermined linear equations system.
+        Fast LU decomposition is used, which is less accurate than QR decomposition
+        (especially for overdetermined systems), but it's twice as efficient.
+        Use qr_solve if you want more precision or have to solve a very ill-
+        conditioned system.
+        If you specify real=True, it does not check for overdeterminded complex
+        systems.
+        """
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            # do not overwrite A nor b
+            A, b = ctx.matrix(A, **kwargs).copy(), ctx.matrix(b, **kwargs).copy()
+            if A.rows < A.cols:
+                raise ValueError('cannot solve underdetermined system')
+            if A.rows > A.cols:
+                # use least-squares method if overdetermined
+                # (this increases errors)
+                AH = A.H
+                A = AH * A
+                b = AH * b
+                if (kwargs.get('real', False) or
+                    not sum(type(i) is ctx.mpc for i in A)):
+                    # TODO: necessary to check also b?
+                    x = ctx.cholesky_solve(A, b)
+                else:
+                    x = ctx.lu_solve(A, b)
+            else:
+                # LU factorization
+                A, p = ctx.LU_decomp(A)
+                b = ctx.L_solve(A, b, p)
+                x = ctx.U_solve(A, b)
+        finally:
+            ctx.prec = prec
+        return x
+    def improve_solution(ctx, A, x, b, maxsteps=1):
+        """
+        Improve a solution to a linear equation system iteratively.
+        This re-uses the LU decomposition and is thus cheap.
+        Usually 3 up to 4 iterations are giving the maximal improvement.
+        """
+        if A.rows != A.cols:
+            raise RuntimeError("need n*n matrix") # TODO: really?
+        for _ in xrange(maxsteps):
+            r = ctx.residual(A, x, b)
+            if ctx.norm(r, 2) < 10*ctx.eps:
+                break
+            # this uses cached LU decomposition and is thus cheap
+            dx = ctx.lu_solve(A, -r)
+            x += dx
+        return x
+    def lu(ctx, A):
+        """
+        A -> P, L, U
+        LU factorisation of a square matrix A. L is the lower, U the upper part.
+        P is the permutation matrix indicating the row swaps.
+        P*A = L*U
+        If you need efficiency, use the low-level method LU_decomp instead, it's
+        much more memory efficient.
+        """
+        # get factorization
+        A, p = ctx.LU_decomp(A)
+        n = A.rows
+        L = ctx.matrix(n)
+        U = ctx.matrix(n)
+        for i in xrange(n):
+            for j in xrange(n):
+                if i > j:
+                    L[i,j] = A[i,j]
+                elif i == j:
+                    L[i,j] = 1
+                    U[i,j] = A[i,j]
+                else:
+                    U[i,j] = A[i,j]
+        # calculate permutation matrix
+        P = ctx.eye(n)
+        for k in xrange(len(p)):
+            ctx.swap_row(P, k, p[k])
+        return P, L, U
+    def unitvector(ctx, n, i):
+        """
+        Return the i-th n-dimensional unit vector.
+        """
+        assert 0 < i <= n, 'this unit vector does not exist'
+        return [ctx.zero]*(i-1) + [ctx.one] + [ctx.zero]*(n-i)
+    def inverse(ctx, A, **kwargs):
+        """
+        Calculate the inverse of a matrix.
+        If you want to solve an equation system Ax = b, it's recommended to use
+        solve(A, b) instead, it's about 3 times more efficient.
+        """
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            # do not overwrite A
+            A = ctx.matrix(A, **kwargs).copy()
+            n = A.rows
+            # get LU factorisation
+            A, p = ctx.LU_decomp(A)
+            cols = []
+            # calculate unit vectors and solve corresponding system to get columns
+            for i in xrange(1, n + 1):
+                e = ctx.unitvector(n, i)
+                y = ctx.L_solve(A, e, p)
+                cols.append(ctx.U_solve(A, y))
+            # convert columns to matrix
+            inv = []
+            for i in xrange(n):
+                row = []
+                for j in xrange(n):
+                    row.append(cols[j][i])
+                inv.append(row)
+            result = ctx.matrix(inv, **kwargs)
+        finally:
+            ctx.prec = prec
+        return result
+    def householder(ctx, A):
+        """
+        (A|b) -> H, p, x, res
+        (A|b) is the coefficient matrix with left hand side of an optionally
+        overdetermined linear equation system.
+        H and p contain all information about the transformation matrices.
+        x is the solution, res the residual.
+        """
+        if not isinstance(A, ctx.matrix):
+            raise TypeError("A should be a type of ctx.matrix")
+        m = A.rows
+        n = A.cols
+        if m < n - 1:
+            raise RuntimeError("Columns should not be less than rows")
+        # calculate Householder matrix
+        p = []
+        for j in xrange(0, n - 1):
+            s = ctx.fsum(abs(A[i,j])**2 for i in xrange(j, m))
+            if not abs(s) > ctx.eps:
+                raise ValueError('matrix is numerically singular')
+            p.append(-ctx.sign(ctx.re(A[j,j])) * ctx.sqrt(s))
+            kappa = ctx.one / (s - p[j] * A[j,j])
+            A[j,j] -= p[j]
+            for k in xrange(j+1, n):
+                y = ctx.fsum(ctx.conj(A[i,j]) * A[i,k] for i in xrange(j, m)) * kappa
+                for i in xrange(j, m):
+                    A[i,k] -= A[i,j] * y
+        # solve Rx = c1
+        x = [A[i,n - 1] for i in xrange(n - 1)]
+        for i in xrange(n - 2, -1, -1):
+            x[i] -= ctx.fsum(A[i,j] * x[j] for j in xrange(i + 1, n - 1))
+            x[i] /= p[i]
+        # calculate residual
+        if not m == n - 1:
+            r = [A[m-1-i, n-1] for i in xrange(m - n + 1)]
+        else:
+            # determined system, residual should be 0
+            r = [0]*m # maybe a bad idea, changing r[i] will change all elements
+        return A, p, x, r
+    #def qr(ctx, A):
+    #    """
+    #    A -> Q, R
+    #
+    #    QR factorisation of a square matrix A using Householder decomposition.
+    #    Q is orthogonal, this leads to very few numerical errors.
+    #
+    #    A = Q*R
+    #    """
+    #    H, p, x, res = householder(A)
+    # TODO: implement this
+    def residual(ctx, A, x, b, **kwargs):
+        """
+        Calculate the residual of a solution to a linear equation system.
+        r = A*x - b for A*x = b
+        """
+        oldprec = ctx.prec
+        try:
+            ctx.prec *= 2
+            A, x, b = ctx.matrix(A, **kwargs), ctx.matrix(x, **kwargs), ctx.matrix(b, **kwargs)
+            return A*x - b
+        finally:
+            ctx.prec = oldprec
+    def qr_solve(ctx, A, b, norm=None, **kwargs):
+        """
+        Ax = b => x, ||Ax - b||
+        Solve a determined or overdetermined linear equations system and
+        calculate the norm of the residual (error).
+        QR decomposition using Householder factorization is applied, which gives very
+        accurate results even for ill-conditioned matrices. qr_solve is twice as
+        efficient.
+        """
+        if norm is None:
+            norm = ctx.norm
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            # do not overwrite A nor b
+            A, b = ctx.matrix(A, **kwargs).copy(), ctx.matrix(b, **kwargs).copy()
+            if A.rows < A.cols:
+                raise ValueError('cannot solve underdetermined system')
+            H, p, x, r = ctx.householder(ctx.extend(A, b))
+            res = ctx.norm(r)
+            # calculate residual "manually" for determined systems
+            if res == 0:
+                res = ctx.norm(ctx.residual(A, x, b))
+            return ctx.matrix(x, **kwargs), res
+        finally:
+            ctx.prec = prec
+    def cholesky(ctx, A, tol=None):
+        r"""
+        Cholesky decomposition of a symmetric positive-definite matrix `A`.
+        Returns a lower triangular matrix `L` such that `A = L \times L^T`.
+        More generally, for a complex Hermitian positive-definite matrix,
+        a Cholesky decomposition satisfying `A = L \times L^H` is returned.
+        The Cholesky decomposition can be used to solve linear equation
+        systems twice as efficiently as LU decomposition, or to
+        test whether `A` is positive-definite.
+        The optional parameter ``tol`` determines the tolerance for
+        verifying positive-definiteness.
+        **Examples**
+        Cholesky decomposition of a positive-definite symmetric matrix::
+            >>> from mpmath import *
+            >>> mp.dps = 25; mp.pretty = True
+            >>> A = eye(3) + hilbert(3)
+            >>> nprint(A)
+            [     2.0      0.5  0.333333]
+            [     0.5  1.33333      0.25]
+            [0.333333     0.25       1.2]
+            >>> L = cholesky(A)
+            >>> nprint(L)
+            [ 1.41421      0.0      0.0]
+            [0.353553  1.09924      0.0]
+            [0.235702  0.15162  1.05899]
+            >>> chop(A - L*L.T)
+            [0.0  0.0  0.0]
+            [0.0  0.0  0.0]
+            [0.0  0.0  0.0]
+        Cholesky decomposition of a Hermitian matrix::
+            >>> A = eye(3) + matrix([[0,0.25j,-0.5j],[-0.25j,0,0],[0.5j,0,0]])
+            >>> L = cholesky(A)
+            >>> nprint(L)
+            [          1.0                0.0                0.0]
+            [(0.0 - 0.25j)  (0.968246 + 0.0j)                0.0]
+            [ (0.0 + 0.5j)  (0.129099 + 0.0j)  (0.856349 + 0.0j)]
+            >>> chop(A - L*L.H)
+            [0.0  0.0  0.0]
+            [0.0  0.0  0.0]
+            [0.0  0.0  0.0]
+        Attempted Cholesky decomposition of a matrix that is not positive
+        definite::
+            >>> A = -eye(3) + hilbert(3)
+            >>> L = cholesky(A)
+            Traceback (most recent call last):
+              ...
+            ValueError: matrix is not positive-definite
+        **References**
+        1. [Wikipedia]_ http://en.wikipedia.org/wiki/Cholesky_decomposition
+        """
+        if not isinstance(A, ctx.matrix):
+            raise RuntimeError("A should be a type of ctx.matrix")
+        if not A.rows == A.cols:
+            raise ValueError('need n*n matrix')
+        if tol is None:
+            tol = +ctx.eps
+        n = A.rows
+        L = ctx.matrix(n)
+        for j in xrange(n):
+            c = ctx.re(A[j,j])
+            if abs(c-A[j,j]) > tol:
+                raise ValueError('matrix is not Hermitian')
+            s = c - ctx.fsum((L[j,k] for k in xrange(j)),
+                absolute=True, squared=True)
+            if s < tol:
+                raise ValueError('matrix is not positive-definite')
+            L[j,j] = ctx.sqrt(s)
+            for i in xrange(j, n):
+                it1 = (L[i,k] for k in xrange(j))
+                it2 = (L[j,k] for k in xrange(j))
+                t = ctx.fdot(it1, it2, conjugate=True)
+                L[i,j] = (A[i,j] - t) / L[j,j]
+        return L
+    def cholesky_solve(ctx, A, b, **kwargs):
+        """
+        Ax = b => x
+        Solve a symmetric positive-definite linear equation system.
+        This is twice as efficient as lu_solve.
+        Typical use cases:
+        * A.T*A
+        * Hessian matrix
+        * differential equations
+        """
+        prec = ctx.prec
+        try:
+            ctx.prec += 10
+            # do not overwrite A nor b
+            A, b = ctx.matrix(A, **kwargs).copy(), ctx.matrix(b, **kwargs).copy()
+            if A.rows !=  A.cols:
+                raise ValueError('can only solve determined system')
+            # Cholesky factorization
+            L = ctx.cholesky(A)
+            # solve
+            n = L.rows
+            if len(b) != n:
+                raise ValueError("Value should be equal to n")
+            for i in xrange(n):
+                b[i] -= ctx.fsum(L[i,j] * b[j] for j in xrange(i))
+                b[i] /= L[i,i]
+            x = ctx.U_solve(L.T, b)
+            return x
+        finally:
+            ctx.prec = prec
+    def det(ctx, A):
+        """
+        Calculate the determinant of a matrix.
+        """
+        prec = ctx.prec
+        try:
+            # do not overwrite A
+            A = ctx.matrix(A).copy()
+            # use LU factorization to calculate determinant
+            try:
+                R, p = ctx.LU_decomp(A)
+            except ZeroDivisionError:
+                return 0
+            z = 1
+            for i, e in enumerate(p):
+                if i != e:
+                    z *= -1
+            for i in xrange(A.rows):
+                z *= R[i,i]
+            return z
+        finally:
+            ctx.prec = prec
+    def cond(ctx, A, norm=None):
+        """
+        Calculate the condition number of a matrix using a specified matrix norm.
+        The condition number estimates the sensitivity of a matrix to errors.
+        Example: small input errors for ill-conditioned coefficient matrices
+        alter the solution of the system dramatically.
+        For ill-conditioned matrices it's recommended to use qr_solve() instead
+        of lu_solve(). This does not help with input errors however, it just avoids
+        to add additional errors.
+        Definition:    cond(A) = ||A|| * ||A**-1||
+        """
+        if norm is None:
+            norm = lambda x: ctx.mnorm(x,1)
+        return norm(A) * norm(ctx.inverse(A))
+    def lu_solve_mat(ctx, a, b):
+        """Solve a * x = b  where a and b are matrices."""
+        r = ctx.matrix(a.rows, b.cols)
+        for i in range(b.cols):
+            c = ctx.lu_solve(a, b.column(i))
+            for j in range(len(c)):
+                r[j, i] = c[j]
+        return r
+    def qr(ctx, A, mode = 'full', edps = 10):
+        """
+        Compute a QR factorization $A = QR$ where
+        A is an m x n matrix of real or complex numbers where m >= n
+        mode has following meanings:
+        (1) mode = 'raw' returns two matrixes (A, tau) in the
+            internal format used by LAPACK
+        (2) mode = 'skinny' returns the leading n columns of Q
+            and n rows of R
+        (3) Any other value returns the leading m columns of Q
+            and m rows of R
+        edps is the increase in mp precision used for calculations
+        **Examples**
+            >>> from mpmath import *
+            >>> mp.dps = 15
+            >>> mp.pretty = True
+            >>> A = matrix([[1, 2], [3, 4], [1, 1]])
+            >>> Q, R = qr(A)
+            >>> Q
+            [-0.301511344577764   0.861640436855329   0.408248290463863]
+            [-0.904534033733291  -0.123091490979333  -0.408248290463863]
+            [-0.301511344577764  -0.492365963917331   0.816496580927726]
+            >>> R
+            [-3.3166247903554  -4.52267016866645]
+            [             0.0  0.738548945875996]
+            [             0.0                0.0]
+            >>> Q * R
+            [1.0  2.0]
+            [3.0  4.0]
+            [1.0  1.0]
+            >>> chop(Q.T * Q)
+            [1.0  0.0  0.0]
+            [0.0  1.0  0.0]
+            [0.0  0.0  1.0]
+            >>> B = matrix([[1+0j, 2-3j], [3+j, 4+5j]])
+            >>> Q, R = qr(B)
+            >>> nprint(Q)
+            [     (-0.301511 + 0.0j)   (0.0695795 - 0.95092j)]
+            [(-0.904534 - 0.301511j)  (-0.115966 + 0.278318j)]
+            >>> nprint(R)
+            [(-3.31662 + 0.0j)  (-5.72872 - 2.41209j)]
+            [              0.0       (3.91965 + 0.0j)]
+            >>> Q * R
+            [(1.0 + 0.0j)  (2.0 - 3.0j)]
+            [(3.0 + 1.0j)  (4.0 + 5.0j)]
+            >>> chop(Q.T * Q.conjugate())
+            [1.0  0.0]
+            [0.0  1.0]
+        """
+        # check values before continuing
+        assert isinstance(A, ctx.matrix)
+        m = A.rows
+        n = A.cols
+        assert n >= 0
+        assert m >= n
+        assert edps >= 0
+        # check for complex data type
+        cmplx = any(type(x) is ctx.mpc for x in A)
+        # temporarily increase the precision and initialize
+        with ctx.extradps(edps):
+            tau = ctx.matrix(n,1)
+            A = A.copy()
+            # ---------------
+            # FACTOR MATRIX A
+            # ---------------
+            if cmplx:
+                one = ctx.mpc('1.0', '0.0')
+                zero = ctx.mpc('0.0', '0.0')
+                rzero = ctx.mpf('0.0')
+                # main loop to factor A (complex)
+                for j in xrange(0, n):
+                    alpha = A[j,j]
+                    alphr = ctx.re(alpha)
+                    alphi = ctx.im(alpha)
+                    if (m-j) >= 2:
+                        xnorm = ctx.fsum( A[i,j]*ctx.conj(A[i,j]) for i in xrange(j+1, m) )
+                        xnorm = ctx.re( ctx.sqrt(xnorm) )
+                    else:
+                        xnorm = rzero
+                    if (xnorm == rzero) and (alphi == rzero):
+                        tau[j] = zero
+                        continue
+                    if alphr < rzero:
+                        beta = ctx.sqrt(alphr**2 + alphi**2 + xnorm**2)
+                    else:
+                        beta = -ctx.sqrt(alphr**2 + alphi**2 + xnorm**2)
+                    tau[j] = ctx.mpc( (beta - alphr) / beta, -alphi / beta )
+                    t = -ctx.conj(tau[j])
+                    za = one / (alpha - beta)
+                    for i in xrange(j+1, m):
+                        A[i,j] *= za
+                    A[j,j] = one
+                    for k in xrange(j+1, n):
+                        y = ctx.fsum(A[i,j] * ctx.conj(A[i,k]) for i in xrange(j, m))
+                        temp = t * ctx.conj(y)
+                        for i in xrange(j, m):
+                            A[i,k] += A[i,j] * temp
+                    A[j,j] = ctx.mpc(beta, '0.0')
+            else:
+                one = ctx.mpf('1.0')
+                zero = ctx.mpf('0.0')
+                # main loop to factor A (real)
+                for j in xrange(0, n):
+                    alpha = A[j,j]
+                    if (m-j) > 2:
+                        xnorm = ctx.fsum( (A[i,j])**2 for i in xrange(j+1, m) )
+                        xnorm = ctx.sqrt(xnorm)
+                    elif (m-j) == 2:
+                        xnorm = abs( A[m-1,j] )
+                    else:
+                        xnorm = zero
+                    if xnorm == zero:
+                        tau[j] = zero
+                        continue
+                    if alpha < zero:
+                        beta = ctx.sqrt(alpha**2 + xnorm**2)
+                    else:
+                        beta = -ctx.sqrt(alpha**2 + xnorm**2)
+                    tau[j] = (beta - alpha) / beta
+                    t = -tau[j]
+                    da = one / (alpha - beta)
+                    for i in xrange(j+1, m):
+                        A[i,j] *= da
+                    A[j,j] = one
+                    for k in xrange(j+1, n):
+                        y = ctx.fsum( A[i,j] * A[i,k] for i in xrange(j, m) )
+                        temp = t * y
+                        for i in xrange(j,m):
+                            A[i,k] += A[i,j] * temp
+                    A[j,j] = beta
+            # return factorization in same internal format as LAPACK
+            if (mode == 'raw') or (mode == 'RAW'):
+                return A, tau
+            # ----------------------------------
+            # FORM Q USING BACKWARD ACCUMULATION
+            # ----------------------------------
+            # form R before the values are overwritten
+            R = A.copy()
+            for j in xrange(0, n):
+                for i in xrange(j+1, m):
+                    R[i,j] = zero
+            # set the value of p (number of columns of Q to return)
+            p = m
+            if (mode == 'skinny') or (mode == 'SKINNY'):
+                p = n
+            # add columns to A if needed and initialize
+            A.cols += (p-n)
+            for j in xrange(0, p):
+                A[j,j] = one
+                for i in xrange(0, j):
+                    A[i,j] = zero
+            # main loop to form Q
+            for j in xrange(n-1, -1, -1):
+                t = -tau[j]
+                A[j,j] += t
+                for k in xrange(j+1, p):
+                    if cmplx:
+                        y = ctx.fsum(A[i,j] * ctx.conj(A[i,k]) for i in xrange(j+1, m))
+                        temp = t * ctx.conj(y)
+                    else:
+                        y = ctx.fsum(A[i,j] * A[i,k] for i in xrange(j+1, m))
+                        temp = t * y
+                    A[j,k] = temp
+                    for i in xrange(j+1, m):
+                        A[i,k] += A[i,j] * temp
+                for i in xrange(j+1, m):
+                    A[i, j] *= t
+            return A, R[0:p,0:n]
+        # ------------------
+        # END OF FUNCTION QR
+        # ------------------

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/matrices/matrices.py ADDED Viewed

	@@ -0,0 +1,1005 @@

+from ..libmp.backend import xrange
+import warnings
+# TODO: interpret list as vectors (for multiplication)
+rowsep = '\n'
+colsep = '  '
+class _matrix(object):
+    """
+    Numerical matrix.
+    Specify the dimensions or the data as a nested list.
+    Elements default to zero.
+    Use a flat list to create a column vector easily.
+    The datatype of the context (mpf for mp, mpi for iv, and float for fp) is used to store the data.
+    Creating matrices
+    -----------------
+    Matrices in mpmath are implemented using dictionaries. Only non-zero values
+    are stored, so it is cheap to represent sparse matrices.
+    The most basic way to create one is to use the ``matrix`` class directly.
+    You can create an empty matrix specifying the dimensions:
+        >>> from mpmath import *
+        >>> mp.dps = 15
+        >>> matrix(2)
+        matrix(
+        [['0.0', '0.0'],
+         ['0.0', '0.0']])
+        >>> matrix(2, 3)
+        matrix(
+        [['0.0', '0.0', '0.0'],
+         ['0.0', '0.0', '0.0']])
+    Calling ``matrix`` with one dimension will create a square matrix.
+    To access the dimensions of a matrix, use the ``rows`` or ``cols`` keyword:
+        >>> A = matrix(3, 2)
+        >>> A
+        matrix(
+        [['0.0', '0.0'],
+         ['0.0', '0.0'],
+         ['0.0', '0.0']])
+        >>> A.rows
+        3
+        >>> A.cols
+        2
+    You can also change the dimension of an existing matrix. This will set the
+    new elements to 0. If the new dimension is smaller than before, the
+    concerning elements are discarded:
+        >>> A.rows = 2
+        >>> A
+        matrix(
+        [['0.0', '0.0'],
+         ['0.0', '0.0']])
+    Internally ``mpmathify`` is used every time an element is set. This
+    is done using the syntax A[row,column], counting from 0:
+        >>> A = matrix(2)
+        >>> A[1,1] = 1 + 1j
+        >>> A
+        matrix(
+        [['0.0', '0.0'],
+         ['0.0', mpc(real='1.0', imag='1.0')]])
+    A more comfortable way to create a matrix lets you use nested lists:
+        >>> matrix([[1, 2], [3, 4]])
+        matrix(
+        [['1.0', '2.0'],
+         ['3.0', '4.0']])
+    Convenient advanced functions are available for creating various standard
+    matrices, see ``zeros``, ``ones``, ``diag``, ``eye``, ``randmatrix`` and
+    ``hilbert``.
+    Vectors
+    .......
+    Vectors may also be represented by the ``matrix`` class (with rows = 1 or cols = 1).
+    For vectors there are some things which make life easier. A column vector can
+    be created using a flat list, a row vectors using an almost flat nested list::
+        >>> matrix([1, 2, 3])
+        matrix(
+        [['1.0'],
+         ['2.0'],
+         ['3.0']])
+        >>> matrix([[1, 2, 3]])
+        matrix(
+        [['1.0', '2.0', '3.0']])
+    Optionally vectors can be accessed like lists, using only a single index::
+        >>> x = matrix([1, 2, 3])
+        >>> x[1]
+        mpf('2.0')
+        >>> x[1,0]
+        mpf('2.0')
+    Other
+    .....
+    Like you probably expected, matrices can be printed::
+        >>> print randmatrix(3) # doctest:+SKIP
+        [ 0.782963853573023  0.802057689719883  0.427895717335467]
+        [0.0541876859348597  0.708243266653103  0.615134039977379]
+        [ 0.856151514955773  0.544759264818486  0.686210904770947]
+    Use ``nstr`` or ``nprint`` to specify the number of digits to print::
+        >>> nprint(randmatrix(5), 3) # doctest:+SKIP
+        [2.07e-1  1.66e-1  5.06e-1  1.89e-1  8.29e-1]
+        [6.62e-1  6.55e-1  4.47e-1  4.82e-1  2.06e-2]
+        [4.33e-1  7.75e-1  6.93e-2  2.86e-1  5.71e-1]
+        [1.01e-1  2.53e-1  6.13e-1  3.32e-1  2.59e-1]
+        [1.56e-1  7.27e-2  6.05e-1  6.67e-2  2.79e-1]
+    As matrices are mutable, you will need to copy them sometimes::
+        >>> A = matrix(2)
+        >>> A
+        matrix(
+        [['0.0', '0.0'],
+         ['0.0', '0.0']])
+        >>> B = A.copy()
+        >>> B[0,0] = 1
+        >>> B
+        matrix(
+        [['1.0', '0.0'],
+         ['0.0', '0.0']])
+        >>> A
+        matrix(
+        [['0.0', '0.0'],
+         ['0.0', '0.0']])
+    Finally, it is possible to convert a matrix to a nested list. This is very useful,
+    as most Python libraries involving matrices or arrays (namely NumPy or SymPy)
+    support this format::
+        >>> B.tolist()
+        [[mpf('1.0'), mpf('0.0')], [mpf('0.0'), mpf('0.0')]]
+    Matrix operations
+    -----------------
+    You can add and subtract matrices of compatible dimensions::
+        >>> A = matrix([[1, 2], [3, 4]])
+        >>> B = matrix([[-2, 4], [5, 9]])
+        >>> A + B
+        matrix(
+        [['-1.0', '6.0'],
+         ['8.0', '13.0']])
+        >>> A - B
+        matrix(
+        [['3.0', '-2.0'],
+         ['-2.0', '-5.0']])
+        >>> A + ones(3) # doctest:+ELLIPSIS
+        Traceback (most recent call last):
+          ...
+        ValueError: incompatible dimensions for addition
+    It is possible to multiply or add matrices and scalars. In the latter case the
+    operation will be done element-wise::
+        >>> A * 2
+        matrix(
+        [['2.0', '4.0'],
+         ['6.0', '8.0']])
+        >>> A / 4
+        matrix(
+        [['0.25', '0.5'],
+         ['0.75', '1.0']])
+        >>> A - 1
+        matrix(
+        [['0.0', '1.0'],
+         ['2.0', '3.0']])
+    Of course you can perform matrix multiplication, if the dimensions are
+    compatible, using ``@`` (for Python >= 3.5) or ``*``. For clarity, ``@`` is
+    recommended (`PEP 465 <https://www.python.org/dev/peps/pep-0465/>`), because
+    the meaning of ``*`` is different in many other Python libraries such as NumPy.
+        >>> A @ B # doctest:+SKIP
+        matrix(
+        [['8.0', '22.0'],
+         ['14.0', '48.0']])
+        >>> A * B # same as A @ B
+        matrix(
+        [['8.0', '22.0'],
+         ['14.0', '48.0']])
+        >>> matrix([[1, 2, 3]]) * matrix([[-6], [7], [-2]])
+        matrix(
+        [['2.0']])
+    ..
+        COMMENT: TODO: the above "doctest:+SKIP" may be removed as soon as we
+        have dropped support for Python 3.5 and below.
+    You can raise powers of square matrices::
+        >>> A**2
+        matrix(
+        [['7.0', '10.0'],
+         ['15.0', '22.0']])
+    Negative powers will calculate the inverse::
+        >>> A**-1
+        matrix(
+        [['-2.0', '1.0'],
+         ['1.5', '-0.5']])
+        >>> A * A**-1
+        matrix(
+        [['1.0', '1.0842021724855e-19'],
+         ['-2.16840434497101e-19', '1.0']])
+    Matrix transposition is straightforward::
+        >>> A = ones(2, 3)
+        >>> A
+        matrix(
+        [['1.0', '1.0', '1.0'],
+         ['1.0', '1.0', '1.0']])
+        >>> A.T
+        matrix(
+        [['1.0', '1.0'],
+         ['1.0', '1.0'],
+         ['1.0', '1.0']])
+    Norms
+    .....
+    Sometimes you need to know how "large" a matrix or vector is. Due to their
+    multidimensional nature it's not possible to compare them, but there are
+    several functions to map a matrix or a vector to a positive real number, the
+    so called norms.
+    For vectors the p-norm is intended, usually the 1-, the 2- and the oo-norm are
+    used.
+        >>> x = matrix([-10, 2, 100])
+        >>> norm(x, 1)
+        mpf('112.0')
+        >>> norm(x, 2)
+        mpf('100.5186549850325')
+        >>> norm(x, inf)
+        mpf('100.0')
+    Please note that the 2-norm is the most used one, though it is more expensive
+    to calculate than the 1- or oo-norm.
+    It is possible to generalize some vector norms to matrix norm::
+        >>> A = matrix([[1, -1000], [100, 50]])
+        >>> mnorm(A, 1)
+        mpf('1050.0')
+        >>> mnorm(A, inf)
+        mpf('1001.0')
+        >>> mnorm(A, 'F')
+        mpf('1006.2310867787777')
+    The last norm (the "Frobenius-norm") is an approximation for the 2-norm, which
+    is hard to calculate and not available. The Frobenius-norm lacks some
+    mathematical properties you might expect from a norm.
+    """
+    def __init__(self, *args, **kwargs):
+        self.__data = {}
+        # LU decompostion cache, this is useful when solving the same system
+        # multiple times, when calculating the inverse and when calculating the
+        # determinant
+        self._LU = None
+        if "force_type" in kwargs:
+            warnings.warn("The force_type argument was removed, it did not work"
+                " properly anyway. If you want to force floating-point or"
+                " interval computations, use the respective methods from `fp`"
+                " or `mp` instead, e.g., `fp.matrix()` or `iv.matrix()`."
+                " If you want to truncate values to integer, use .apply(int) instead.")
+        if isinstance(args[0], (list, tuple)):
+            if isinstance(args[0][0], (list, tuple)):
+                # interpret nested list as matrix
+                A = args[0]
+                self.__rows = len(A)
+                self.__cols = len(A[0])
+                for i, row in enumerate(A):
+                    for j, a in enumerate(row):
+                        # note: this will call __setitem__ which will call self.ctx.convert() to convert the datatype.
+                        self[i, j] = a
+            else:
+                # interpret list as row vector
+                v = args[0]
+                self.__rows = len(v)
+                self.__cols = 1
+                for i, e in enumerate(v):
+                    self[i, 0] = e
+        elif isinstance(args[0], int):
+            # create empty matrix of given dimensions
+            if len(args) == 1:
+                self.__rows = self.__cols = args[0]
+            else:
+                if not isinstance(args[1], int):
+                    raise TypeError("expected int")
+                self.__rows = args[0]
+                self.__cols = args[1]
+        elif isinstance(args[0], _matrix):
+            A = args[0]
+            self.__rows = A._matrix__rows
+            self.__cols = A._matrix__cols
+            for i in xrange(A.__rows):
+                for j in xrange(A.__cols):
+                    self[i, j] = A[i, j]
+        elif hasattr(args[0], 'tolist'):
+            A = self.ctx.matrix(args[0].tolist())
+            self.__data = A._matrix__data
+            self.__rows = A._matrix__rows
+            self.__cols = A._matrix__cols
+        else:
+            raise TypeError('could not interpret given arguments')
+    def apply(self, f):
+        """
+        Return a copy of self with the function `f` applied elementwise.
+        """
+        new = self.ctx.matrix(self.__rows, self.__cols)
+        for i in xrange(self.__rows):
+            for j in xrange(self.__cols):
+                new[i,j] = f(self[i,j])
+        return new
+    def __nstr__(self, n=None, **kwargs):
+        # Build table of string representations of the elements
+        res = []
+        # Track per-column max lengths for pretty alignment
+        maxlen = [0] * self.cols
+        for i in range(self.rows):
+            res.append([])
+            for j in range(self.cols):
+                if n:
+                    string = self.ctx.nstr(self[i,j], n, **kwargs)
+                else:
+                    string = str(self[i,j])
+                res[-1].append(string)
+                maxlen[j] = max(len(string), maxlen[j])
+        # Patch strings together
+        for i, row in enumerate(res):
+            for j, elem in enumerate(row):
+                # Pad each element up to maxlen so the columns line up
+                row[j] = elem.rjust(maxlen[j])
+            res[i] = "[" + colsep.join(row) + "]"
+        return rowsep.join(res)
+    def __str__(self):
+        return self.__nstr__()
+    def _toliststr(self, avoid_type=False):
+        """
+        Create a list string from a matrix.
+        If avoid_type: avoid multiple 'mpf's.
+        """
+        # XXX: should be something like self.ctx._types
+        typ = self.ctx.mpf
+        s = '['
+        for i in xrange(self.__rows):
+            s += '['
+            for j in xrange(self.__cols):
+                if not avoid_type or not isinstance(self[i,j], typ):
+                    a = repr(self[i,j])
+                else:
+                    a = "'" + str(self[i,j]) + "'"
+                s += a + ', '
+            s = s[:-2]
+            s += '],\n '
+        s = s[:-3]
+        s += ']'
+        return s
+    def tolist(self):
+        """
+        Convert the matrix to a nested list.
+        """
+        return [[self[i,j] for j in range(self.__cols)] for i in range(self.__rows)]
+    def __repr__(self):
+        if self.ctx.pretty:
+            return self.__str__()
+        s = 'matrix(\n'
+        s += self._toliststr(avoid_type=True) + ')'
+        return s
+    def __get_element(self, key):
+        '''
+        Fast extraction of the i,j element from the matrix
+            This function is for private use only because is unsafe:
+                1. Does not check on the value of key it expects key to be a integer tuple (i,j)
+                2. Does not check bounds
+        '''
+        if key in self.__data:
+            return self.__data[key]
+        else:
+            return self.ctx.zero
+    def __set_element(self, key, value):
+        '''
+        Fast assignment of the i,j element in the matrix
+            This function is unsafe:
+                1. Does not check on the value of key it expects key to be a integer tuple (i,j)
+                2. Does not check bounds
+                3. Does not check the value type
+                4. Does not reset the LU cache
+        '''
+        if value: # only store non-zeros
+            self.__data[key] = value
+        elif key in self.__data:
+            del self.__data[key]
+    def __getitem__(self, key):
+        '''
+            Getitem function for mp matrix class with slice index enabled
+            it allows the following assingments
+            scalar to a slice of the matrix
+         B = A[:,2:6]
+        '''
+        # Convert vector to matrix indexing
+        if isinstance(key, int) or isinstance(key,slice):
+            # only sufficent for vectors
+            if self.__rows == 1:
+                key = (0, key)
+            elif self.__cols == 1:
+                key = (key, 0)
+            else:
+                raise IndexError('insufficient indices for matrix')
+        if isinstance(key[0],slice) or isinstance(key[1],slice):
+            #Rows
+            if isinstance(key[0],slice):
+                #Check bounds
+                if (key[0].start is None or key[0].start >= 0) and \
+                    (key[0].stop is None or key[0].stop <= self.__rows+1):
+                    # Generate indices
+                    rows = xrange(*key[0].indices(self.__rows))
+                else:
+                    raise IndexError('Row index out of bounds')
+            else:
+                # Single row
+                rows = [key[0]]
+            # Columns
+            if isinstance(key[1],slice):
+                # Check bounds
+                if (key[1].start is None or key[1].start >= 0) and \
+                    (key[1].stop is None or key[1].stop <= self.__cols+1):
+                    # Generate indices
+                    columns = xrange(*key[1].indices(self.__cols))
+                else:
+                    raise IndexError('Column index out of bounds')
+            else:
+                # Single column
+                columns = [key[1]]
+            # Create matrix slice
+            m = self.ctx.matrix(len(rows),len(columns))
+            # Assign elements to the output matrix
+            for i,x in enumerate(rows):
+                for j,y in enumerate(columns):
+                    m.__set_element((i,j),self.__get_element((x,y)))
+            return m
+        else:
+            # single element extraction
+            if key[0] >= self.__rows or key[1] >= self.__cols:
+                raise IndexError('matrix index out of range')
+            if key in self.__data:
+                return self.__data[key]
+            else:
+                return self.ctx.zero
+    def __setitem__(self, key, value):
+        # setitem function for mp matrix class with slice index enabled
+        # it allows the following assingments
+        #  scalar to a slice of the matrix
+        # A[:,2:6] = 2.5
+        #  submatrix to matrix (the value matrix should be the same size as the slice size)
+        # A[3,:] = B   where A is n x m  and B is n x 1
+        # Convert vector to matrix indexing
+        if isinstance(key, int) or isinstance(key,slice):
+            # only sufficent for vectors
+            if self.__rows == 1:
+                key = (0, key)
+            elif self.__cols == 1:
+                key = (key, 0)
+            else:
+                raise IndexError('insufficient indices for matrix')
+        # Slice indexing
+        if isinstance(key[0],slice) or isinstance(key[1],slice):
+            # Rows
+            if isinstance(key[0],slice):
+                # Check bounds
+                if (key[0].start is None or key[0].start >= 0) and \
+                    (key[0].stop is None or key[0].stop <= self.__rows+1):
+                    # generate row indices
+                    rows = xrange(*key[0].indices(self.__rows))
+                else:
+                    raise IndexError('Row index out of bounds')
+            else:
+                # Single row
+                rows = [key[0]]
+            # Columns
+            if isinstance(key[1],slice):
+                # Check bounds
+                if (key[1].start is None or key[1].start >= 0) and \
+                    (key[1].stop is None or key[1].stop <= self.__cols+1):
+                    # Generate column indices
+                    columns = xrange(*key[1].indices(self.__cols))
+                else:
+                    raise IndexError('Column index out of bounds')
+            else:
+                # Single column
+                columns = [key[1]]
+            # Assign slice with a scalar
+            if isinstance(value,self.ctx.matrix):
+                # Assign elements to matrix if input and output dimensions match
+                if len(rows) == value.rows and len(columns) == value.cols:
+                    for i,x in enumerate(rows):
+                        for j,y in enumerate(columns):
+                            self.__set_element((x,y), value.__get_element((i,j)))
+                else:
+                    raise ValueError('Dimensions do not match')
+            else:
+                # Assign slice with scalars
+                value = self.ctx.convert(value)
+                for i in rows:
+                    for j in columns:
+                        self.__set_element((i,j), value)
+        else:
+            # Single element assingment
+            # Check bounds
+            if key[0] >= self.__rows or key[1] >= self.__cols:
+                raise IndexError('matrix index out of range')
+            # Convert and store value
+            value = self.ctx.convert(value)
+            if value: # only store non-zeros
+                self.__data[key] = value
+            elif key in self.__data:
+                del self.__data[key]
+        if self._LU:
+            self._LU = None
+        return
+    def __iter__(self):
+        for i in xrange(self.__rows):
+            for j in xrange(self.__cols):
+                yield self[i,j]
+    def __mul__(self, other):
+        if isinstance(other, self.ctx.matrix):
+            # dot multiplication
+            if self.__cols != other.__rows:
+                raise ValueError('dimensions not compatible for multiplication')
+            new = self.ctx.matrix(self.__rows, other.__cols)
+            self_zero = self.ctx.zero
+            self_get = self.__data.get
+            other_zero = other.ctx.zero
+            other_get = other.__data.get
+            for i in xrange(self.__rows):
+                for j in xrange(other.__cols):
+                    new[i, j] = self.ctx.fdot((self_get((i,k), self_zero), other_get((k,j), other_zero))
+                                     for k in xrange(other.__rows))
+            return new
+        else:
+            # try scalar multiplication
+            new = self.ctx.matrix(self.__rows, self.__cols)
+            for i in xrange(self.__rows):
+                for j in xrange(self.__cols):
+                    new[i, j] = other * self[i, j]
+            return new
+    def __matmul__(self, other):
+        return self.__mul__(other)
+    def __rmul__(self, other):
+        # assume other is scalar and thus commutative
+        if isinstance(other, self.ctx.matrix):
+            raise TypeError("other should not be type of ctx.matrix")
+        return self.__mul__(other)
+    def __pow__(self, other):
+        # avoid cyclic import problems
+        #from linalg import inverse
+        if not isinstance(other, int):
+            raise ValueError('only integer exponents are supported')
+        if not self.__rows == self.__cols:
+            raise ValueError('only powers of square matrices are defined')
+        n = other
+        if n == 0:
+            return self.ctx.eye(self.__rows)
+        if n < 0:
+            n = -n
+            neg = True
+        else:
+            neg = False
+        i = n
+        y = 1
+        z = self.copy()
+        while i != 0:
+            if i % 2 == 1:
+                y = y * z
+            z = z*z
+            i = i // 2
+        if neg:
+            y = self.ctx.inverse(y)
+        return y
+    def __div__(self, other):
+        # assume other is scalar and do element-wise divison
+        assert not isinstance(other, self.ctx.matrix)
+        new = self.ctx.matrix(self.__rows, self.__cols)
+        for i in xrange(self.__rows):
+            for j in xrange(self.__cols):
+                new[i,j] = self[i,j] / other
+        return new
+    __truediv__ = __div__
+    def __add__(self, other):
+        if isinstance(other, self.ctx.matrix):
+            if not (self.__rows == other.__rows and self.__cols == other.__cols):
+                raise ValueError('incompatible dimensions for addition')
+            new = self.ctx.matrix(self.__rows, self.__cols)
+            for i in xrange(self.__rows):
+                for j in xrange(self.__cols):
+                    new[i,j] = self[i,j] + other[i,j]
+            return new
+        else:
+            # assume other is scalar and add element-wise
+            new = self.ctx.matrix(self.__rows, self.__cols)
+            for i in xrange(self.__rows):
+                for j in xrange(self.__cols):
+                    new[i,j] += self[i,j] + other
+            return new
+    def __radd__(self, other):
+        return self.__add__(other)
+    def __sub__(self, other):
+        if isinstance(other, self.ctx.matrix) and not (self.__rows == other.__rows
+                                              and self.__cols == other.__cols):
+            raise ValueError('incompatible dimensions for subtraction')
+        return self.__add__(other * (-1))
+    def __pos__(self):
+        """
+        +M returns a copy of M, rounded to current working precision.
+        """
+        return (+1) * self
+    def __neg__(self):
+        return (-1) * self
+    def __rsub__(self, other):
+        return -self + other
+    def __eq__(self, other):
+        return self.__rows == other.__rows and self.__cols == other.__cols \
+               and self.__data == other.__data
+    def __len__(self):
+        if self.rows == 1:
+            return self.cols
+        elif self.cols == 1:
+            return self.rows
+        else:
+            return self.rows # do it like numpy
+    def __getrows(self):
+        return self.__rows
+    def __setrows(self, value):
+        for key in self.__data.copy():
+            if key[0] >= value:
+                del self.__data[key]
+        self.__rows = value
+    rows = property(__getrows, __setrows, doc='number of rows')
+    def __getcols(self):
+        return self.__cols
+    def __setcols(self, value):
+        for key in self.__data.copy():
+            if key[1] >= value:
+                del self.__data[key]
+        self.__cols = value
+    cols = property(__getcols, __setcols, doc='number of columns')
+    def transpose(self):
+        new = self.ctx.matrix(self.__cols, self.__rows)
+        for i in xrange(self.__rows):
+            for j in xrange(self.__cols):
+                new[j,i] = self[i,j]
+        return new
+    T = property(transpose)
+    def conjugate(self):
+        return self.apply(self.ctx.conj)
+    def transpose_conj(self):
+        return self.conjugate().transpose()
+    H = property(transpose_conj)
+    def copy(self):
+        new = self.ctx.matrix(self.__rows, self.__cols)
+        new.__data = self.__data.copy()
+        return new
+    __copy__ = copy
+    def column(self, n):
+        m = self.ctx.matrix(self.rows, 1)
+        for i in range(self.rows):
+            m[i] = self[i,n]
+        return m
+class MatrixMethods(object):
+    def __init__(ctx):
+        # XXX: subclass
+        ctx.matrix = type('matrix', (_matrix,), {})
+        ctx.matrix.ctx = ctx
+        ctx.matrix.convert = ctx.convert
+    def eye(ctx, n, **kwargs):
+        """
+        Create square identity matrix n x n.
+        """
+        A = ctx.matrix(n, **kwargs)
+        for i in xrange(n):
+            A[i,i] = 1
+        return A
+    def diag(ctx, diagonal, **kwargs):
+        """
+        Create square diagonal matrix using given list.
+        Example:
+        >>> from mpmath import diag, mp
+        >>> mp.pretty = False
+        >>> diag([1, 2, 3])
+        matrix(
+        [['1.0', '0.0', '0.0'],
+         ['0.0', '2.0', '0.0'],
+         ['0.0', '0.0', '3.0']])
+        """
+        A = ctx.matrix(len(diagonal), **kwargs)
+        for i in xrange(len(diagonal)):
+            A[i,i] = diagonal[i]
+        return A
+    def zeros(ctx, *args, **kwargs):
+        """
+        Create matrix m x n filled with zeros.
+        One given dimension will create square matrix n x n.
+        Example:
+        >>> from mpmath import zeros, mp
+        >>> mp.pretty = False
+        >>> zeros(2)
+        matrix(
+        [['0.0', '0.0'],
+         ['0.0', '0.0']])
+        """
+        if len(args) == 1:
+            m = n = args[0]
+        elif len(args) == 2:
+            m = args[0]
+            n = args[1]
+        else:
+            raise TypeError('zeros expected at most 2 arguments, got %i' % len(args))
+        A = ctx.matrix(m, n, **kwargs)
+        for i in xrange(m):
+            for j in xrange(n):
+                A[i,j] = 0
+        return A
+    def ones(ctx, *args, **kwargs):
+        """
+        Create matrix m x n filled with ones.
+        One given dimension will create square matrix n x n.
+        Example:
+        >>> from mpmath import ones, mp
+        >>> mp.pretty = False
+        >>> ones(2)
+        matrix(
+        [['1.0', '1.0'],
+         ['1.0', '1.0']])
+        """
+        if len(args) == 1:
+            m = n = args[0]
+        elif len(args) == 2:
+            m = args[0]
+            n = args[1]
+        else:
+            raise TypeError('ones expected at most 2 arguments, got %i' % len(args))
+        A = ctx.matrix(m, n, **kwargs)
+        for i in xrange(m):
+            for j in xrange(n):
+                A[i,j] = 1
+        return A
+    def hilbert(ctx, m, n=None):
+        """
+        Create (pseudo) hilbert matrix m x n.
+        One given dimension will create hilbert matrix n x n.
+        The matrix is very ill-conditioned and symmetric, positive definite if
+        square.
+        """
+        if n is None:
+            n = m
+        A = ctx.matrix(m, n)
+        for i in xrange(m):
+            for j in xrange(n):
+                A[i,j] = ctx.one / (i + j + 1)
+        return A
+    def randmatrix(ctx, m, n=None, min=0, max=1, **kwargs):
+        """
+        Create a random m x n matrix.
+        All values are >= min and <max.
+        n defaults to m.
+        Example:
+        >>> from mpmath import randmatrix
+        >>> randmatrix(2) # doctest:+SKIP
+        matrix(
+        [['0.53491598236191806', '0.57195669543302752'],
+         ['0.85589992269513615', '0.82444367501382143']])
+        """
+        if not n:
+            n = m
+        A = ctx.matrix(m, n, **kwargs)
+        for i in xrange(m):
+            for j in xrange(n):
+                A[i,j] = ctx.rand() * (max - min) + min
+        return A
+    def swap_row(ctx, A, i, j):
+        """
+        Swap row i with row j.
+        """
+        if i == j:
+            return
+        if isinstance(A, ctx.matrix):
+            for k in xrange(A.cols):
+                A[i,k], A[j,k] = A[j,k], A[i,k]
+        elif isinstance(A, list):
+            A[i], A[j] = A[j], A[i]
+        else:
+            raise TypeError('could not interpret type')
+    def extend(ctx, A, b):
+        """
+        Extend matrix A with column b and return result.
+        """
+        if not isinstance(A, ctx.matrix):
+            raise TypeError("A should be a type of ctx.matrix")
+        if A.rows != len(b):
+            raise ValueError("Value should be equal to len(b)")
+        A = A.copy()
+        A.cols += 1
+        for i in xrange(A.rows):
+            A[i, A.cols-1] = b[i]
+        return A
+    def norm(ctx, x, p=2):
+        r"""
+        Gives the entrywise `p`-norm of an iterable *x*, i.e. the vector norm
+        `\left(\sum_k |x_k|^p\right)^{1/p}`, for any given `1 \le p \le \infty`.
+        Special cases:
+        If *x* is not iterable, this just returns ``absmax(x)``.
+        ``p=1`` gives the sum of absolute values.
+        ``p=2`` is the standard Euclidean vector norm.
+        ``p=inf`` gives the magnitude of the largest element.
+        For *x* a matrix, ``p=2`` is the Frobenius norm.
+        For operator matrix norms, use :func:`~mpmath.mnorm` instead.
+        You can use the string 'inf' as well as float('inf') or mpf('inf')
+        to specify the infinity norm.
+        **Examples**
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = False
+            >>> x = matrix([-10, 2, 100])
+            >>> norm(x, 1)
+            mpf('112.0')
+            >>> norm(x, 2)
+            mpf('100.5186549850325')
+            >>> norm(x, inf)
+            mpf('100.0')
+        """
+        try:
+            iter(x)
+        except TypeError:
+            return ctx.absmax(x)
+        if type(p) is not int:
+            p = ctx.convert(p)
+        if p == ctx.inf:
+            return max(ctx.absmax(i) for i in x)
+        elif p == 1:
+            return ctx.fsum(x, absolute=1)
+        elif p == 2:
+            return ctx.sqrt(ctx.fsum(x, absolute=1, squared=1))
+        elif p > 1:
+            return ctx.nthroot(ctx.fsum(abs(i)**p for i in x), p)
+        else:
+            raise ValueError('p has to be >= 1')
+    def mnorm(ctx, A, p=1):
+        r"""
+        Gives the matrix (operator) `p`-norm of A. Currently ``p=1`` and ``p=inf``
+        are supported:
+        ``p=1`` gives the 1-norm (maximal column sum)
+        ``p=inf`` gives the `\infty`-norm (maximal row sum).
+        You can use the string 'inf' as well as float('inf') or mpf('inf')
+        ``p=2`` (not implemented) for a square matrix is the usual spectral
+        matrix norm, i.e. the largest singular value.
+        ``p='f'`` (or 'F', 'fro', 'Frobenius, 'frobenius') gives the
+        Frobenius norm, which is the elementwise 2-norm. The Frobenius norm is an
+        approximation of the spectral norm and satisfies
+        .. math ::
+            \frac{1}{\sqrt{\mathrm{rank}(A)}} \|A\|_F \le \|A\|_2 \le \|A\|_F
+        The Frobenius norm lacks some mathematical properties that might
+        be expected of a norm.
+        For general elementwise `p`-norms, use :func:`~mpmath.norm` instead.
+        **Examples**
+            >>> from mpmath import *
+            >>> mp.dps = 15; mp.pretty = False
+            >>> A = matrix([[1, -1000], [100, 50]])
+            >>> mnorm(A, 1)
+            mpf('1050.0')
+            >>> mnorm(A, inf)
+            mpf('1001.0')
+            >>> mnorm(A, 'F')
+            mpf('1006.2310867787777')
+        """
+        A = ctx.matrix(A)
+        if type(p) is not int:
+            if type(p) is str and 'frobenius'.startswith(p.lower()):
+                return ctx.norm(A, 2)
+            p = ctx.convert(p)
+        m, n = A.rows, A.cols
+        if p == 1:
+            return max(ctx.fsum((A[i,j] for i in xrange(m)), absolute=1) for j in xrange(n))
+        elif p == ctx.inf:
+            return max(ctx.fsum((A[i,j] for j in xrange(n)), absolute=1) for i in xrange(m))
+        else:
+            raise NotImplementedError("matrix p-norm for arbitrary p")
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (214 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (222 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/cublasLt.h ADDED Viewed

	@@ -0,0 +1,1853 @@

+/*
+ * Copyright 1993-2022 NVIDIA Corporation. All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#pragma once
+#ifndef CUBLASAPI
+#ifdef __CUDACC__
+#define CUBLASAPI __host__ __device__
+#else
+#define CUBLASAPI
+#endif
+#endif
+#include <cublas_api.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+/** Opaque structure holding CUBLASLT context
+ */
+typedef struct cublasLtContext* cublasLtHandle_t;
+cublasStatus_t CUBLASWINAPI cublasLtCreate(cublasLtHandle_t* lightHandle);
+cublasStatus_t CUBLASWINAPI cublasLtDestroy(cublasLtHandle_t lightHandle);
+const char* CUBLASWINAPI cublasLtGetStatusName(cublasStatus_t status);
+const char* CUBLASWINAPI cublasLtGetStatusString(cublasStatus_t status);
+size_t CUBLASWINAPI cublasLtGetVersion(void);
+size_t CUBLASWINAPI cublasLtGetCudartVersion(void);
+cublasStatus_t CUBLASWINAPI cublasLtGetProperty(libraryPropertyType type, int* value);
+cublasStatus_t CUBLASWINAPI cublasLtHeuristicsCacheGetCapacity(size_t* capacity);
+cublasStatus_t CUBLASWINAPI cublasLtHeuristicsCacheSetCapacity(size_t capacity);
+/** Semi-opaque descriptor for matrix memory layout
+ */
+typedef struct {
+  uint64_t data[8];
+} cublasLtMatrixLayoutOpaque_t;
+/** Opaque descriptor for matrix memory layout
+ */
+typedef cublasLtMatrixLayoutOpaque_t* cublasLtMatrixLayout_t;
+/** Semi-opaque algorithm descriptor (to avoid complicated alloc/free schemes)
+ *
+ * This structure can be trivially serialized and later restored for use with the same version of cuBLAS library to save
+ * on selecting the right configuration again.
+ */
+typedef struct {
+  uint64_t data[8];
+} cublasLtMatmulAlgo_t;
+/** Semi-opaque descriptor for cublasLtMatmul() operation details
+ */
+typedef struct {
+  uint64_t data[23];
+} cublasLtMatmulDescOpaque_t;
+/** Opaque descriptor for cublasLtMatmul() operation details
+ */
+typedef cublasLtMatmulDescOpaque_t* cublasLtMatmulDesc_t;
+/** Semi-opaque descriptor for cublasLtMatrixTransform() operation details
+ */
+typedef struct {
+  uint64_t data[8];
+} cublasLtMatrixTransformDescOpaque_t;
+/** Opaque descriptor for cublasLtMatrixTransform() operation details
+ */
+typedef cublasLtMatrixTransformDescOpaque_t* cublasLtMatrixTransformDesc_t;
+/** Semi-opaque descriptor for cublasLtMatmulPreference() operation details
+ */
+typedef struct {
+  uint64_t data[10];
+} cublasLtMatmulPreferenceOpaque_t;
+/** Opaque descriptor for cublasLtMatmulAlgoGetHeuristic() configuration
+ */
+typedef cublasLtMatmulPreferenceOpaque_t* cublasLtMatmulPreference_t;
+/** Tile size (in C/D matrix Rows x Cols)
+ *
+ * General order of tile IDs is sorted by size first and by first dimension second.
+ */
+typedef enum {
+  CUBLASLT_MATMUL_TILE_UNDEFINED = 0,
+  CUBLASLT_MATMUL_TILE_8x8 = 1,
+  CUBLASLT_MATMUL_TILE_8x16 = 2,
+  CUBLASLT_MATMUL_TILE_16x8 = 3,
+  CUBLASLT_MATMUL_TILE_8x32 = 4,
+  CUBLASLT_MATMUL_TILE_16x16 = 5,
+  CUBLASLT_MATMUL_TILE_32x8 = 6,
+  CUBLASLT_MATMUL_TILE_8x64 = 7,
+  CUBLASLT_MATMUL_TILE_16x32 = 8,
+  CUBLASLT_MATMUL_TILE_32x16 = 9,
+  CUBLASLT_MATMUL_TILE_64x8 = 10,
+  CUBLASLT_MATMUL_TILE_32x32 = 11,
+  CUBLASLT_MATMUL_TILE_32x64 = 12,
+  CUBLASLT_MATMUL_TILE_64x32 = 13,
+  CUBLASLT_MATMUL_TILE_32x128 = 14,
+  CUBLASLT_MATMUL_TILE_64x64 = 15,
+  CUBLASLT_MATMUL_TILE_128x32 = 16,
+  CUBLASLT_MATMUL_TILE_64x128 = 17,
+  CUBLASLT_MATMUL_TILE_128x64 = 18,
+  CUBLASLT_MATMUL_TILE_64x256 = 19,
+  CUBLASLT_MATMUL_TILE_128x128 = 20,
+  CUBLASLT_MATMUL_TILE_256x64 = 21,
+  CUBLASLT_MATMUL_TILE_64x512 = 22,
+  CUBLASLT_MATMUL_TILE_128x256 = 23,
+  CUBLASLT_MATMUL_TILE_256x128 = 24,
+  CUBLASLT_MATMUL_TILE_512x64 = 25,
+  CUBLASLT_MATMUL_TILE_64x96 = 26,
+  CUBLASLT_MATMUL_TILE_96x64 = 27,
+  CUBLASLT_MATMUL_TILE_96x128 = 28,
+  CUBLASLT_MATMUL_TILE_128x160 = 29,
+  CUBLASLT_MATMUL_TILE_160x128 = 30,
+  CUBLASLT_MATMUL_TILE_192x128 = 31,
+  CUBLASLT_MATMUL_TILE_128x192 = 32,
+  CUBLASLT_MATMUL_TILE_128x96 = 33,
+  CUBLASLT_MATMUL_TILE_END
+} cublasLtMatmulTile_t;
+/** Size and number of stages in which elements are read into shared memory
+ *
+ * General order of stages IDs is sorted by stage size first and by number of stages second.
+ */
+typedef enum {
+  CUBLASLT_MATMUL_STAGES_UNDEFINED = 0,
+  CUBLASLT_MATMUL_STAGES_16x1 = 1,
+  CUBLASLT_MATMUL_STAGES_16x2 = 2,
+  CUBLASLT_MATMUL_STAGES_16x3 = 3,
+  CUBLASLT_MATMUL_STAGES_16x4 = 4,
+  CUBLASLT_MATMUL_STAGES_16x5 = 5,
+  CUBLASLT_MATMUL_STAGES_16x6 = 6,
+  CUBLASLT_MATMUL_STAGES_32x1 = 7,
+  CUBLASLT_MATMUL_STAGES_32x2 = 8,
+  CUBLASLT_MATMUL_STAGES_32x3 = 9,
+  CUBLASLT_MATMUL_STAGES_32x4 = 10,
+  CUBLASLT_MATMUL_STAGES_32x5 = 11,
+  CUBLASLT_MATMUL_STAGES_32x6 = 12,
+  CUBLASLT_MATMUL_STAGES_64x1 = 13,
+  CUBLASLT_MATMUL_STAGES_64x2 = 14,
+  CUBLASLT_MATMUL_STAGES_64x3 = 15,
+  CUBLASLT_MATMUL_STAGES_64x4 = 16,
+  CUBLASLT_MATMUL_STAGES_64x5 = 17,
+  CUBLASLT_MATMUL_STAGES_64x6 = 18,
+  CUBLASLT_MATMUL_STAGES_128x1 = 19,
+  CUBLASLT_MATMUL_STAGES_128x2 = 20,
+  CUBLASLT_MATMUL_STAGES_128x3 = 21,
+  CUBLASLT_MATMUL_STAGES_128x4 = 22,
+  CUBLASLT_MATMUL_STAGES_128x5 = 23,
+  CUBLASLT_MATMUL_STAGES_128x6 = 24,
+  CUBLASLT_MATMUL_STAGES_32x10 = 25,
+  CUBLASLT_MATMUL_STAGES_8x4 = 26,
+  CUBLASLT_MATMUL_STAGES_16x10 = 27,
+  CUBLASLT_MATMUL_STAGES_8x5 = 28,
+  CUBLASLT_MATMUL_STAGES_16x80 = 29,
+  CUBLASLT_MATMUL_STAGES_64x80 = 30,
+  CUBLASLT_MATMUL_STAGES_8x3 = 31,
+  CUBLASLT_MATMUL_STAGES_8xAUTO = 32,
+  CUBLASLT_MATMUL_STAGES_16xAUTO = 33,
+  CUBLASLT_MATMUL_STAGES_32xAUTO = 34,
+  CUBLASLT_MATMUL_STAGES_64xAUTO = 35,
+  CUBLASLT_MATMUL_STAGES_128xAUTO = 36,
+  CUBLASLT_MATMUL_STAGES_END
+} cublasLtMatmulStages_t;
+/** Thread Block Cluster size
+ *
+ * Typically dimensioned similar to cublasLtMatmulTile_t, with the third coordinate unused at this time.
+ */
+typedef enum {
+  /** Let library pick cluster shape automatically */
+  CUBLASLT_CLUSTER_SHAPE_AUTO = 0,
+  CUBLASLT_CLUSTER_SHAPE_1x1x1 = 2,
+  CUBLASLT_CLUSTER_SHAPE_2x1x1 = 3,
+  CUBLASLT_CLUSTER_SHAPE_4x1x1 = 4,
+  CUBLASLT_CLUSTER_SHAPE_1x2x1 = 5,
+  CUBLASLT_CLUSTER_SHAPE_2x2x1 = 6,
+  CUBLASLT_CLUSTER_SHAPE_4x2x1 = 7,
+  CUBLASLT_CLUSTER_SHAPE_1x4x1 = 8,
+  CUBLASLT_CLUSTER_SHAPE_2x4x1 = 9,
+  CUBLASLT_CLUSTER_SHAPE_4x4x1 = 10,
+  CUBLASLT_CLUSTER_SHAPE_8x1x1 = 11,
+  CUBLASLT_CLUSTER_SHAPE_1x8x1 = 12,
+  CUBLASLT_CLUSTER_SHAPE_8x2x1 = 13,
+  CUBLASLT_CLUSTER_SHAPE_2x8x1 = 14,
+  CUBLASLT_CLUSTER_SHAPE_16x1x1 = 15,
+  CUBLASLT_CLUSTER_SHAPE_1x16x1 = 16,
+  CUBLASLT_CLUSTER_SHAPE_3x1x1 = 17,
+  CUBLASLT_CLUSTER_SHAPE_5x1x1 = 18,
+  CUBLASLT_CLUSTER_SHAPE_6x1x1 = 19,
+  CUBLASLT_CLUSTER_SHAPE_7x1x1 = 20,
+  CUBLASLT_CLUSTER_SHAPE_9x1x1 = 21,
+  CUBLASLT_CLUSTER_SHAPE_10x1x1 = 22,
+  CUBLASLT_CLUSTER_SHAPE_11x1x1 = 23,
+  CUBLASLT_CLUSTER_SHAPE_12x1x1 = 24,
+  CUBLASLT_CLUSTER_SHAPE_13x1x1 = 25,
+  CUBLASLT_CLUSTER_SHAPE_14x1x1 = 26,
+  CUBLASLT_CLUSTER_SHAPE_15x1x1 = 27,
+  CUBLASLT_CLUSTER_SHAPE_3x2x1 = 28,
+  CUBLASLT_CLUSTER_SHAPE_5x2x1 = 29,
+  CUBLASLT_CLUSTER_SHAPE_6x2x1 = 30,
+  CUBLASLT_CLUSTER_SHAPE_7x2x1 = 31,
+  CUBLASLT_CLUSTER_SHAPE_1x3x1 = 32,
+  CUBLASLT_CLUSTER_SHAPE_2x3x1 = 33,
+  CUBLASLT_CLUSTER_SHAPE_3x3x1 = 34,
+  CUBLASLT_CLUSTER_SHAPE_4x3x1 = 35,
+  CUBLASLT_CLUSTER_SHAPE_5x3x1 = 36,
+  CUBLASLT_CLUSTER_SHAPE_3x4x1 = 37,
+  CUBLASLT_CLUSTER_SHAPE_1x5x1 = 38,
+  CUBLASLT_CLUSTER_SHAPE_2x5x1 = 39,
+  CUBLASLT_CLUSTER_SHAPE_3x5x1 = 40,
+  CUBLASLT_CLUSTER_SHAPE_1x6x1 = 41,
+  CUBLASLT_CLUSTER_SHAPE_2x6x1 = 42,
+  CUBLASLT_CLUSTER_SHAPE_1x7x1 = 43,
+  CUBLASLT_CLUSTER_SHAPE_2x7x1 = 44,
+  CUBLASLT_CLUSTER_SHAPE_1x9x1 = 45,
+  CUBLASLT_CLUSTER_SHAPE_1x10x1 = 46,
+  CUBLASLT_CLUSTER_SHAPE_1x11x1 = 47,
+  CUBLASLT_CLUSTER_SHAPE_1x12x1 = 48,
+  CUBLASLT_CLUSTER_SHAPE_1x13x1 = 49,
+  CUBLASLT_CLUSTER_SHAPE_1x14x1 = 50,
+  CUBLASLT_CLUSTER_SHAPE_1x15x1 = 51,
+  CUBLASLT_CLUSTER_SHAPE_END
+} cublasLtClusterShape_t;
+/** Inner size of the kernel
+ *
+ * Represents various aspects of internal kernel design, that don't impact CUDA grid size but may have other more subtle
+ * effects.
+ *
+ */
+typedef enum {
+  CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED = 0,
+  CUBLASLT_MATMUL_INNER_SHAPE_MMA884 = 1,
+  CUBLASLT_MATMUL_INNER_SHAPE_MMA1684 = 2,
+  CUBLASLT_MATMUL_INNER_SHAPE_MMA1688 = 3,
+  CUBLASLT_MATMUL_INNER_SHAPE_MMA16816 = 4,
+  CUBLASLT_MATMUL_INNER_SHAPE_END
+} cublasLtMatmulInnerShape_t;
+/** Pointer mode to use for alpha/beta */
+typedef enum {
+  /** matches CUBLAS_POINTER_MODE_HOST, pointer targets a single value host memory */
+  CUBLASLT_POINTER_MODE_HOST = CUBLAS_POINTER_MODE_HOST,
+  /** matches CUBLAS_POINTER_MODE_DEVICE, pointer targets a single value device memory */
+  CUBLASLT_POINTER_MODE_DEVICE = CUBLAS_POINTER_MODE_DEVICE,
+  /** pointer targets an array in device memory */
+  CUBLASLT_POINTER_MODE_DEVICE_VECTOR = 2,
+  /** alpha pointer targets an array in device memory, beta is zero. Note:
+     CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE is not supported, must be 0. */
+  CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO = 3,
+  /** alpha pointer targets an array in device memory, beta is a single value in host memory. */
+  CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST = 4,
+} cublasLtPointerMode_t;
+/** Mask to define and query pointer mode capability */
+typedef enum {
+  /** no initial filtering is performed when querying pointer mode capabilities, will use gemm pointer mode defined in
+     operation description **/
+  CUBLASLT_POINTER_MODE_MASK_NO_FILTERING = 0,
+  /** see CUBLASLT_POINTER_MODE_HOST */
+  CUBLASLT_POINTER_MODE_MASK_HOST = 1,
+  /** see CUBLASLT_POINTER_MODE_DEVICE */
+  CUBLASLT_POINTER_MODE_MASK_DEVICE = 2,
+  /** see CUBLASLT_POINTER_MODE_DEVICE_VECTOR */
+  CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR = 4,
+  /** see CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO */
+  CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO = 8,
+  /** see CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST */
+  CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_HOST = 16,
+} cublasLtPointerModeMask_t;
+/** Implementation details that may affect numerical behavior of algorithms. */
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_FMA (0x01ull << 0)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA (0x02ull << 0)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_IMMA (0x04ull << 0)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_DMMA (0x08ull << 0)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_TENSOR_OP_MASK (0xfeull << 0)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_TYPE_MASK (0xffull << 0)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_16F (0x01ull << 8)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32F (0x02ull << 8)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_64F (0x04ull << 8)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32I (0x08ull << 8)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_TYPE_MASK (0xffull << 8)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16F (0x01ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16BF (0x02ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_TF32 (0x04ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_32F (0x08ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_64F (0x10ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8I (0x20ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E4M3 (0x40ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E5M2 (0x80ull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK (0xffull << 16)
+#define CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN (0x01ull << 32)
+typedef uint64_t cublasLtNumericalImplFlags_t;
+/** Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).
+ *
+ * \retval     CUBLAS_STATUS_NOT_INITIALIZED   if cuBLASLt handle has not been initialized
+ * \retval     CUBLAS_STATUS_INVALID_VALUE     if parameters are in conflict or in an impossible configuration; e.g.
+ *                                             when workspaceSizeInBytes is less than workspace required by configured
+ *                                             algo
+ * \retval     CUBLAS_STATUS_NOT_SUPPORTED     if current implementation on selected device doesn't support configured
+ *                                             operation
+ * \retval     CUBLAS_STATUS_ARCH_MISMATCH     if configured operation cannot be run using selected device
+ * \retval     CUBLAS_STATUS_EXECUTION_FAILED  if cuda reported execution error from the device
+ * \retval     CUBLAS_STATUS_SUCCESS           if the operation completed successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmul(cublasLtHandle_t lightHandle,
+                                           cublasLtMatmulDesc_t computeDesc,
+                                           const void* alpha, /* host or device pointer */
+                                           const void* A,
+                                           cublasLtMatrixLayout_t Adesc,
+                                           const void* B,
+                                           cublasLtMatrixLayout_t Bdesc,
+                                           const void* beta, /* host or device pointer */
+                                           const void* C,
+                                           cublasLtMatrixLayout_t Cdesc,
+                                           void* D,
+                                           cublasLtMatrixLayout_t Ddesc,
+                                           const cublasLtMatmulAlgo_t* algo,
+                                           void* workspace,
+                                           size_t workspaceSizeInBytes,
+                                           cudaStream_t stream);
+/** Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
+ *
+ * Can be used to change memory order of data or to scale and shift the values.
+ *
+ * \retval     CUBLAS_STATUS_NOT_INITIALIZED   if cuBLASLt handle has not been initialized
+ * \retval     CUBLAS_STATUS_INVALID_VALUE     if parameters are in conflict or in an impossible configuration; e.g.
+ *                                             when A is not NULL, but Adesc is NULL
+ * \retval     CUBLAS_STATUS_NOT_SUPPORTED     if current implementation on selected device doesn't support configured
+ *                                             operation
+ * \retval     CUBLAS_STATUS_ARCH_MISMATCH     if configured operation cannot be run using selected device
+ * \retval     CUBLAS_STATUS_EXECUTION_FAILED  if cuda reported execution error from the device
+ * \retval     CUBLAS_STATUS_SUCCESS           if the operation completed successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixTransform(cublasLtHandle_t lightHandle,
+                                                    cublasLtMatrixTransformDesc_t transformDesc,
+                                                    const void* alpha, /* host or device pointer */
+                                                    const void* A,
+                                                    cublasLtMatrixLayout_t Adesc,
+                                                    const void* beta, /* host or device pointer */
+                                                    const void* B,
+                                                    cublasLtMatrixLayout_t Bdesc,
+                                                    void* C,
+                                                    cublasLtMatrixLayout_t Cdesc,
+                                                    cudaStream_t stream);
+/* ---------------------------------------------------------------------------------------*/
+/* Helper functions for cublasLtMatrixLayout_t */
+/* ---------------------------------------------------------------------------------------*/
+/** Enum for data ordering */
+typedef enum {
+  /** Column-major
+   *
+   * Leading dimension is the stride (in elements) to the beginning of next column in memory.
+   */
+  CUBLASLT_ORDER_COL = 0,
+  /** Row major
+   *
+   * Leading dimension is the stride (in elements) to the beginning of next row in memory.
+   */
+  CUBLASLT_ORDER_ROW = 1,
+  /** Column-major ordered tiles of 32 columns.
+   *
+   * Leading dimension is the stride (in elements) to the beginning of next group of 32-columns. E.g. if matrix has 33
+   * columns and 2 rows, ld must be at least (32) * 2 = 64.
+   */
+  CUBLASLT_ORDER_COL32 = 2,
+  /** Column-major ordered tiles of composite tiles with total 32 columns and 8 rows, tile composed of interleaved
+   * inner tiles of 4 columns within 4 even or odd rows in an alternating pattern.
+   *
+   * Leading dimension is the stride (in elements) to the beginning of the first 32 column x 8 row tile for the next
+   * 32-wide group of columns. E.g. if matrix has 33 columns and 1 row, ld must be at least (32 * 8) * 1 = 256.
+   */
+  CUBLASLT_ORDER_COL4_4R2_8C = 3,
+  /** Column-major ordered tiles of composite tiles with total 32 columns ands 32 rows.
+   * Element offset within the tile is calculated as (((row%8)/2*4+row/8)*2+row%2)*32+col.
+   *
+   * Leading dimension is the stride (in elements) to the beginning of the first 32 column x 32 row tile for the next
+   * 32-wide group of columns. E.g. if matrix has 33 columns and 1 row, ld must be at least (32*32)*1 = 1024.
+   */
+  CUBLASLT_ORDER_COL32_2R_4R4 = 4,
+} cublasLtOrder_t;
+/** Attributes of memory layout */
+typedef enum {
+  /** Data type, see cudaDataType.
+   *
+   * uint32_t
+   */
+  CUBLASLT_MATRIX_LAYOUT_TYPE = 0,
+  /** Memory order of the data, see cublasLtOrder_t.
+   *
+   * int32_t, default: CUBLASLT_ORDER_COL
+   */
+  CUBLASLT_MATRIX_LAYOUT_ORDER = 1,
+  /** Number of rows.
+   *
+   * Usually only values that can be expressed as int32_t are supported.
+   *
+   * uint64_t
+   */
+  CUBLASLT_MATRIX_LAYOUT_ROWS = 2,
+  /** Number of columns.
+   *
+   * Usually only values that can be expressed as int32_t are supported.
+   *
+   * uint64_t
+   */
+  CUBLASLT_MATRIX_LAYOUT_COLS = 3,
+  /** Matrix leading dimension.
+   *
+   * For CUBLASLT_ORDER_COL this is stride (in elements) of matrix column, for more details and documentation for
+   * other memory orders see documentation for cublasLtOrder_t values.
+   *
+   * Currently only non-negative values are supported, must be large enough so that matrix memory locations are not
+   * overlapping (e.g. greater or equal to CUBLASLT_MATRIX_LAYOUT_ROWS in case of CUBLASLT_ORDER_COL).
+   *
+   * int64_t;
+   */
+  CUBLASLT_MATRIX_LAYOUT_LD = 4,
+  /** Number of matmul operations to perform in the batch.
+   *
+   * See also CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT
+   *
+   * int32_t, default: 1
+   */
+  CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT = 5,
+  /** Stride (in elements) to the next matrix for strided batch operation.
+   *
+   * When matrix type is planar-complex (CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET != 0), batch stride
+   * is interpreted by cublasLtMatmul() in number of real valued sub-elements. E.g. for data of type CUDA_C_16F,
+   * offset of 1024B is encoded as a stride of value 512 (since each element of the real and imaginary matrices
+   * is a 2B (16bit) floating point type).
+   *
+   * NOTE: A bug in cublasLtMatrixTransform() causes it to interpret the batch stride for a planar-complex matrix
+   * as if it was specified in number of complex elements. Therefore an offset of 1024B must be encoded as stride
+   * value 256 when calling cublasLtMatrixTransform() (each complex element is 4B with real and imaginary values 2B
+   * each). This behavior is expected to be corrected in the next major cuBLAS version.
+   *
+   * int64_t, default: 0
+   */
+  CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET = 6,
+  /** Stride (in bytes) to the imaginary plane for planar complex layout.
+   *
+   * int64_t, default: 0 - 0 means that layout is regular (real and imaginary parts of complex numbers are interleaved
+   * in memory in each element)
+   */
+  CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET = 7,
+} cublasLtMatrixLayoutAttribute_t;
+/** Internal. Do not use directly.
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutInit_internal(  //
+    cublasLtMatrixLayout_t matLayout,
+    size_t size,
+    cudaDataType type,
+    uint64_t rows,
+    uint64_t cols,
+    int64_t ld);
+/** Initialize matrix layout descriptor in pre-allocated space.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
+ */
+static inline cublasStatus_t cublasLtMatrixLayoutInit(
+    cublasLtMatrixLayout_t matLayout, cudaDataType type, uint64_t rows, uint64_t cols, int64_t ld) {
+  return cublasLtMatrixLayoutInit_internal(matLayout, sizeof(*matLayout), type, rows, cols, ld);
+}
+/** Create new matrix layout descriptor.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutCreate(  //
+    cublasLtMatrixLayout_t* matLayout,
+    cudaDataType type,
+    uint64_t rows,
+    uint64_t cols,
+    int64_t ld);
+/** Destroy matrix layout descriptor.
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutDestroy(cublasLtMatrixLayout_t matLayout);
+/** Set matrix layout descriptor attribute.
+ *
+ * \param[in]  matLayout    The descriptor
+ * \param[in]  attr         The attribute
+ * \param[in]  buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutSetAttribute(  //
+    cublasLtMatrixLayout_t matLayout,
+    cublasLtMatrixLayoutAttribute_t attr,
+    const void* buf,
+    size_t sizeInBytes);
+/** Get matrix layout descriptor attribute.
+ *
+ * \param[in]  matLayout    The descriptor
+ * \param[in]  attr         The attribute
+ * \param[out] buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ * \param[out] sizeWritten  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
+ *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
+ *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutGetAttribute(  //
+    cublasLtMatrixLayout_t matLayout,
+    cublasLtMatrixLayoutAttribute_t attr,
+    void* buf,
+    size_t sizeInBytes,
+    size_t* sizeWritten);
+/* ---------------------------------------------------------------------------------------*/
+/* Helper functions for cublasLtMatmulDesc_t */
+/* ---------------------------------------------------------------------------------------*/
+/** Matmul descriptor attributes to define details of the operation. */
+typedef enum {
+  /** Compute type, see cudaDataType. Defines data type used for multiply and accumulate operations and the
+   * accumulator during matrix multiplication.
+   *
+   * int32_t
+   */
+  CUBLASLT_MATMUL_DESC_COMPUTE_TYPE = 0,
+  /** Scale type, see cudaDataType. Defines data type of alpha and beta. Accumulator and value from matrix C are
+   * typically converted to scale type before final scaling. Value is then converted from scale type to type of matrix
+   * D before being stored in memory.
+   *
+   * int32_t, default: same as CUBLASLT_MATMUL_DESC_COMPUTE_TYPE
+   */
+  CUBLASLT_MATMUL_DESC_SCALE_TYPE = 1,
+  /** Pointer mode of alpha and beta, see cublasLtPointerMode_t. When CUBLASLT_POINTER_MODE_DEVICE_VECTOR is in use,
+   * alpha/beta vector lenghts must match number of output matrix rows.
+   *
+   * int32_t, default: CUBLASLT_POINTER_MODE_HOST
+   */
+  CUBLASLT_MATMUL_DESC_POINTER_MODE = 2,
+  /** Transform of matrix A, see cublasOperation_t.
+   *
+   * int32_t, default: CUBLAS_OP_N
+   */
+  CUBLASLT_MATMUL_DESC_TRANSA = 3,
+  /** Transform of matrix B, see cublasOperation_t.
+   *
+   * int32_t, default: CUBLAS_OP_N
+   */
+  CUBLASLT_MATMUL_DESC_TRANSB = 4,
+  /** Transform of matrix C, see cublasOperation_t.
+   *
+   * Currently only CUBLAS_OP_N is supported.
+   *
+   * int32_t, default: CUBLAS_OP_N
+   */
+  CUBLASLT_MATMUL_DESC_TRANSC = 5,
+  /** Matrix fill mode, see cublasFillMode_t.
+   *
+   * int32_t, default: CUBLAS_FILL_MODE_FULL
+   */
+  CUBLASLT_MATMUL_DESC_FILL_MODE = 6,
+  /** Epilogue function, see cublasLtEpilogue_t.
+   *
+   * uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT
+   */
+  CUBLASLT_MATMUL_DESC_EPILOGUE = 7,
+  /** Bias or bias gradient vector pointer in the device memory.
+   *
+   * Bias case. See CUBLASLT_EPILOGUE_BIAS.
+   * For bias data type see CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE.
+   *
+   * Bias vector length must match matrix D rows count.
+   *
+   * Bias gradient case. See CUBLASLT_EPILOGUE_DRELU_BGRAD and CUBLASLT_EPILOGUE_DGELU_BGRAD.
+   * Bias gradient vector elements are the same type as the output elements
+   * (Ctype) with the exception of IMMA kernels (see above).
+   *
+   * Routines that don't dereference this pointer, like cublasLtMatmulAlgoGetHeuristic()
+   * depend on its value to determine expected pointer alignment.
+   *
+   * Bias case: const void *, default: NULL
+   * Bias gradient case: void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_BIAS_POINTER = 8,
+  /** Batch stride for bias or bias gradient vector.
+   *
+   * Used together with CUBLASLT_MATMUL_DESC_BIAS_POINTER when matrix D's CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT > 1.
+   *
+   * int64_t, default: 0
+   */
+  CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE = 10,
+  /** Pointer for epilogue auxiliary buffer.
+   *
+   * - Output vector for ReLu bit-mask in forward pass when CUBLASLT_EPILOGUE_RELU_AUX
+   *   or CUBLASLT_EPILOGUE_RELU_AUX_BIAS epilogue is used.
+   * - Input vector for ReLu bit-mask in backward pass when
+   *   CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is used.
+   *
+   * - Output of GELU input matrix in forward pass when
+   *   CUBLASLT_EPILOGUE_GELU_AUX_BIAS epilogue is used.
+   * - Input of GELU input matrix for backward pass when
+   *   CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue is used.
+   *
+   * For aux data type see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE.
+   *
+   * Routines that don't dereference this pointer, like cublasLtMatmulAlgoGetHeuristic()
+   * depend on its value to determine expected pointer alignment.
+   *
+   * Requires setting CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD attribute.
+   *
+   * Forward pass: void *, default: NULL
+   * Backward pass: const void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER = 11,
+  /** Leading dimension for epilogue auxiliary buffer.
+   *
+   * - ReLu bit-mask matrix leading dimension in elements (i.e. bits)
+   *   when CUBLASLT_EPILOGUE_RELU_AUX, CUBLASLT_EPILOGUE_RELU_AUX_BIAS or CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is
+   * used. Must be divisible by 128 and be no less than the number of rows in the output matrix.
+   *
+   * - GELU input matrix leading dimension in elements
+   *   when CUBLASLT_EPILOGUE_GELU_AUX_BIAS or CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue used.
+   *   Must be divisible by 8 and be no less than the number of rows in the output matrix.
+   *
+   * int64_t, default: 0
+   */
+  CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD = 12,
+  /** Batch stride for epilogue auxiliary buffer.
+   *
+   * - ReLu bit-mask matrix batch stride in elements (i.e. bits)
+   *   when CUBLASLT_EPILOGUE_RELU_AUX, CUBLASLT_EPILOGUE_RELU_AUX_BIAS or CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is
+   * used. Must be divisible by 128.
+   *
+   * - GELU input matrix batch stride in elements
+   *   when CUBLASLT_EPILOGUE_GELU_AUX_BIAS or CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue used.
+   *   Must be divisible by 8.
+   *
+   * int64_t, default: 0
+   */
+  CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE = 13,
+  /** Batch stride for alpha vector.
+   *
+   * Used together with CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST when matrix D's
+   * CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT > 1. If CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO is set then
+   * CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE must be set to 0 as this mode doesnt supported batched alpha vector.
+   *
+   * int64_t, default: 0
+   */
+  CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE = 14,
+  /** Number of SMs to target for parallel execution. Optimizes heuristics for execution on a different number of SMs
+   *  when user expects a concurrent stream to be using some of the device resources.
+   *
+   *  int32_t, default: 0 - use the number reported by the device.
+   */
+  CUBLASLT_MATMUL_DESC_SM_COUNT_TARGET = 15,
+  /** Device pointer to the scale factor value that converts data in matrix A to the compute data type range.
+   *
+   *  The scaling factor value must have the same type as the compute type.
+   *
+   *  If not specified, or set to NULL, the scaling factor is assumed to be 1.
+   *
+   *  If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()
+   *  will return CUBLAS_INVALID_VALUE.
+   *
+   *  const void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_A_SCALE_POINTER = 17,
+  /** Device pointer to the scale factor value to convert data in matrix B to compute data type range.
+   *
+   *  The scaling factor value must have the same type as the compute type.
+   *
+   *  If not specified, or set to NULL, the scaling factor is assumed to be 1.
+   *
+   *  If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()
+   *  will return CUBLAS_INVALID_VALUE.
+   *
+   *  const void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_B_SCALE_POINTER = 18,
+  /** Device pointer to the scale factor value to convert data in matrix C to compute data type range.
+   *
+   *  The scaling factor value must have the same type as the compute type.
+   *
+   *  If not specified, or set to NULL, the scaling factor is assumed to be 1.
+   *
+   *  If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()
+   *  will return CUBLAS_INVALID_VALUE.
+   *
+   *  const void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_C_SCALE_POINTER = 19,
+  /** Device pointer to the scale factor value to convert data in matrix D to compute data type range.
+   *
+   *  The scaling factor value must have the same type as the compute type.
+   *
+   *  If not specified, or set to NULL, the scaling factor is assumed to be 1.
+   *
+   *  If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()
+   *  will return CUBLAS_INVALID_VALUE.
+   *
+   *  const void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_D_SCALE_POINTER = 20,
+  /** Device pointer to the memory location that on completion will be set to the maximum of absolute values in the
+   *  output matrix.
+   *
+   *  The computed value has the same type as the compute type.
+   *
+   *  If not specified or set to NULL, the maximum absolute value is not computed. If set for an unsupported matrix
+   *  data, scale, and compute type combination, calling cublasLtMatmul() will return CUBLAS_INVALID_VALUE.
+   *
+   *  void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_AMAX_D_POINTER = 21,
+  /** Type of the data to be stored to the memory pointed to by CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   *
+   *  If unset, the data type defaults to the type of elements of the output matrix with some exceptions, see details
+   * below.
+   *
+   *  ReLu uses a bit-mask.
+   *
+   *  GELU input matrix elements type is the same as the type of elements of
+   *  the output matrix with some exceptions, see details below.
+   *
+   *  For fp8 kernels with output type CUDA_R_8F_E4M3 the aux data type can be CUDA_R_8F_E4M3 or CUDA_R_16F with some
+   *  restrictions.  See https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulDescAttributes_t for more details.
+   *
+   *  If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()
+   *  will return CUBLAS_INVALID_VALUE.
+   *
+   *  int32_t based on cudaDataType, default: -1
+   */
+  CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE = 22,
+  /** Device pointer to the scaling factor value to convert results from compute type data range to storage
+   *  data range in the auxiliary matrix that is set via CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   *
+   *  The scaling factor value must have the same type as the compute type.
+   *
+   *  If not specified, or set to NULL, the scaling factor is assumed to be 1. If set for an unsupported matrix data,
+   *  scale, and compute type combination, calling cublasLtMatmul() will return CUBLAS_INVALID_VALUE.
+   *
+   *  void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_SCALE_POINTER = 23,
+  /** Device pointer to the memory location that on completion will be set to the maximum of absolute values in the
+   *  buffer that is set via CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   *
+   *  The computed value has the same type as the compute type.
+   *
+   *  If not specified or set to NULL, the maximum absolute value is not computed. If set for an unsupported matrix
+   *  data, scale, and compute type combination, calling cublasLtMatmul() will return CUBLAS_INVALID_VALUE.
+   *
+   *  void *, default: NULL
+   */
+  CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_AMAX_POINTER = 24,
+  /** Flag for managing fp8 fast accumulation mode.
+   *  When enabled, problem execution might be faster but at the cost of lower accuracy because intermediate results
+   *  will not periodically be promoted to a higher precision.
+   *
+   *  int8_t, default: 0 - fast accumulation mode is disabled.
+   */
+  CUBLASLT_MATMUL_DESC_FAST_ACCUM = 25,
+  /** Type of bias or bias gradient vector in the device memory.
+   *
+   * Bias case: see CUBLASLT_EPILOGUE_BIAS.
+   *
+   * Bias vector elements are the same type as the elements of output matrix (Dtype) with the following exceptions:
+   * - IMMA kernels with computeType=CUDA_R_32I and Ctype=CUDA_R_8I where the bias vector elements
+   *   are the same type as alpha, beta (CUBLASLT_MATMUL_DESC_SCALE_TYPE=CUDA_R_32F)
+   * - fp8 kernels with an output type of CUDA_R_32F, CUDA_R_8F_E4M3 or CUDA_R_8F_E5M2, See
+   *   https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmul for details.
+   *
+   * int32_t based on cudaDataType, default: -1
+   */
+  CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE = 26,
+} cublasLtMatmulDescAttributes_t;
+/** Internal. Do not use directly.
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulDescInit_internal(  //
+    cublasLtMatmulDesc_t matmulDesc,
+    size_t size,
+    cublasComputeType_t computeType,
+    cudaDataType_t scaleType);
+/** Initialize matmul operation descriptor in pre-allocated space.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was initialized successfully
+ */
+static inline cublasStatus_t cublasLtMatmulDescInit(  //
+    cublasLtMatmulDesc_t matmulDesc,
+    cublasComputeType_t computeType,
+    cudaDataType_t scaleType) {
+  return cublasLtMatmulDescInit_internal(matmulDesc, sizeof(*matmulDesc), computeType, scaleType);
+}
+/** Create new matmul operation descriptor.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulDescCreate(cublasLtMatmulDesc_t* matmulDesc,
+                                                     cublasComputeType_t computeType,
+                                                     cudaDataType_t scaleType);
+/** Destroy matmul operation descriptor.
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulDescDestroy(cublasLtMatmulDesc_t matmulDesc);
+/** Set matmul operation descriptor attribute.
+ *
+ * \param[in]  matmulDesc   The descriptor
+ * \param[in]  attr         The attribute
+ * \param[in]  buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulDescSetAttribute(  //
+    cublasLtMatmulDesc_t matmulDesc,
+    cublasLtMatmulDescAttributes_t attr,
+    const void* buf,
+    size_t sizeInBytes);
+/** Get matmul operation descriptor attribute.
+ *
+ * \param[in]  matmulDesc   The descriptor
+ * \param[in]  attr         The attribute
+ * \param[out] buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ * \param[out] sizeWritten  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
+ *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
+ *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulDescGetAttribute(  //
+    cublasLtMatmulDesc_t matmulDesc,
+    cublasLtMatmulDescAttributes_t attr,
+    void* buf,
+    size_t sizeInBytes,
+    size_t* sizeWritten);
+/* ---------------------------------------------------------------------------------------*/
+/* Helper functions for cublasLtMatrixTransformDesc_t */
+/* ---------------------------------------------------------------------------------------*/
+/** Matrix transform descriptor attributes to define details of the operation.
+ */
+typedef enum {
+  /** Scale type, see cudaDataType. Inputs are converted to scale type for scaling and summation and results are then
+   * converted to output type to store in memory.
+   *
+   * int32_t
+   */
+  CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE,
+  /** Pointer mode of alpha and beta, see cublasLtPointerMode_t.
+   *
+   * int32_t, default: CUBLASLT_POINTER_MODE_HOST
+   */
+  CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE,
+  /** Transform of matrix A, see cublasOperation_t.
+   *
+   * int32_t, default: CUBLAS_OP_N
+   */
+  CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSA,
+  /** Transform of matrix B, see cublasOperation_t.
+   *
+   * int32_t, default: CUBLAS_OP_N
+   */
+  CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSB,
+} cublasLtMatrixTransformDescAttributes_t;
+/** Internal. Do not use directly.
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescInit_internal(cublasLtMatrixTransformDesc_t transformDesc,
+                                                                     size_t size,
+                                                                     cudaDataType scaleType);
+/** Initialize matrix transform operation descriptor in pre-allocated space.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
+ */
+static inline cublasStatus_t cublasLtMatrixTransformDescInit(cublasLtMatrixTransformDesc_t transformDesc,
+                                                             cudaDataType scaleType) {
+  return cublasLtMatrixTransformDescInit_internal(transformDesc, sizeof(*transformDesc), scaleType);
+}
+/** Create new matrix transform operation descriptor.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescCreate(cublasLtMatrixTransformDesc_t* transformDesc,
+                                                              cudaDataType scaleType);
+/** Destroy matrix transform operation descriptor.
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescDestroy(cublasLtMatrixTransformDesc_t transformDesc);
+/** Set matrix transform operation descriptor attribute.
+ *
+ * \param[in]  transformDesc  The descriptor
+ * \param[in]  attr           The attribute
+ * \param[in]  buf            memory address containing the new value
+ * \param[in]  sizeInBytes    size of buf buffer for verification (in bytes)
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescSetAttribute(  //
+    cublasLtMatrixTransformDesc_t transformDesc,
+    cublasLtMatrixTransformDescAttributes_t attr,
+    const void* buf,
+    size_t sizeInBytes);
+/** Get matrix transform operation descriptor attribute.
+ *
+ * \param[in]  transformDesc  The descriptor
+ * \param[in]  attr           The attribute
+ * \param[out] buf            memory address containing the new value
+ * \param[in]  sizeInBytes    size of buf buffer for verification (in bytes)
+ * \param[out] sizeWritten    only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number
+ * of bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
+ *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescGetAttribute(  //
+    cublasLtMatrixTransformDesc_t transformDesc,
+    cublasLtMatrixTransformDescAttributes_t attr,
+    void* buf,
+    size_t sizeInBytes,
+    size_t* sizeWritten);
+/** For computation with complex numbers, this enum allows to apply the Gauss Complexity reduction algorithm
+ */
+typedef enum {
+  CUBLASLT_3M_MODE_DISALLOWED = 0,
+  CUBLASLT_3M_MODE_ALLOWED = 1,
+} cublasLt3mMode_t;
+/** Reduction scheme for portions of the dot-product calculated in parallel (a. k. a. "split - K").
+ */
+typedef enum {
+  /** No reduction scheme, dot-product shall be performed in one sequence.
+   */
+  CUBLASLT_REDUCTION_SCHEME_NONE = 0,
+  /** Reduction is performed "in place" - using the output buffer (and output data type) and counters (in workspace) to
+   * guarantee the sequentiality.
+   */
+  CUBLASLT_REDUCTION_SCHEME_INPLACE = 1,
+  /** Intermediate results are stored in compute type in the workspace and reduced in a separate step.
+   */
+  CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE = 2,
+  /** Intermediate results are stored in output type in the workspace and reduced in a separate step.
+   */
+  CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE = 4,
+  CUBLASLT_REDUCTION_SCHEME_MASK = 0x7,
+} cublasLtReductionScheme_t;
+/** Postprocessing options for the epilogue
+ */
+typedef enum {
+  /** No special postprocessing, just scale and quantize results if necessary.
+   */
+  CUBLASLT_EPILOGUE_DEFAULT = 1,
+  /** ReLu, apply ReLu point-wise transform to the results (x:=max(x, 0)).
+   */
+  CUBLASLT_EPILOGUE_RELU = 2,
+  /** ReLu, apply ReLu point-wise transform to the results (x:=max(x, 0)).
+   *
+   * This epilogue mode produces an extra output, a ReLu bit-mask matrix,
+   * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_RELU_AUX = (CUBLASLT_EPILOGUE_RELU | 128),
+  /** Bias, apply (broadcasted) Bias from bias vector. Bias vector length must match matrix D rows, it must be packed
+   * (stride between vector elements is 1). Bias vector is broadcasted to all columns and added before applying final
+   * postprocessing.
+   */
+  CUBLASLT_EPILOGUE_BIAS = 4,
+  /** ReLu and Bias, apply Bias and then ReLu transform
+   */
+  CUBLASLT_EPILOGUE_RELU_BIAS = (CUBLASLT_EPILOGUE_RELU | CUBLASLT_EPILOGUE_BIAS),
+  /** ReLu and Bias, apply Bias and then ReLu transform
+   *
+   * This epilogue mode produces an extra output, a ReLu bit-mask matrix,
+   * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_RELU_AUX_BIAS = (CUBLASLT_EPILOGUE_RELU_AUX | CUBLASLT_EPILOGUE_BIAS),
+  /* ReLu gradient. Apply ReLu gradient to matmul output. Store ReLu gradient in the output matrix.
+   *
+   * This epilogue mode requires an extra input,
+   * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_DRELU = 8 | 128,
+  /* ReLu and Bias gradients. Apply independently ReLu and Bias gradient to
+   * matmul output. Store ReLu gradient in the output matrix, and Bias gradient
+   * in the auxiliary output (see CUBLASLT_MATMUL_DESC_BIAS_POINTER).
+   *
+   * This epilogue mode requires an extra input,
+   * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_DRELU_BGRAD = CUBLASLT_EPILOGUE_DRELU | 16,
+  /** GELU, apply GELU point-wise transform to the results (x:=GELU(x)).
+   */
+  CUBLASLT_EPILOGUE_GELU = 32,
+  /** GELU, apply GELU point-wise transform to the results (x:=GELU(x)).
+   *
+   * This epilogue mode outputs GELU input as a separate matrix (useful for training).
+   * See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_GELU_AUX = (CUBLASLT_EPILOGUE_GELU | 128),
+  /** GELU and Bias, apply Bias and then GELU transform
+   */
+  CUBLASLT_EPILOGUE_GELU_BIAS = (CUBLASLT_EPILOGUE_GELU | CUBLASLT_EPILOGUE_BIAS),
+  /** GELU and Bias, apply Bias and then GELU transform
+   *
+   * This epilogue mode outputs GELU input as a separate matrix (useful for training).
+   * See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_GELU_AUX_BIAS = (CUBLASLT_EPILOGUE_GELU_AUX | CUBLASLT_EPILOGUE_BIAS),
+  /* GELU gradient. Apply GELU gradient to matmul output. Store GELU gradient in the output matrix.
+   *
+   * This epilogue mode requires an extra input,
+   * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_DGELU = 64 | 128,
+  /* GELU and Bias gradients. Apply independently GELU and Bias gradient to
+   * matmul output. Store GELU gradient in the output matrix, and Bias gradient
+   * in the auxiliary output (see CUBLASLT_MATMUL_DESC_BIAS_POINTER).
+   *
+   * This epilogue mode requires an extra input,
+   * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
+   */
+  CUBLASLT_EPILOGUE_DGELU_BGRAD = CUBLASLT_EPILOGUE_DGELU | 16,
+  /** Bias gradient based on the input matrix A.
+   *
+   * The bias size corresponds to the number of rows of the matrix D.
+   * The reduction happens over the GEMM's "k" dimension.
+   *
+   * Stores Bias gradient in the auxiliary output
+   * (see CUBLASLT_MATMUL_DESC_BIAS_POINTER).
+   */
+  CUBLASLT_EPILOGUE_BGRADA = 256,
+  /** Bias gradient based on the input matrix B.
+   *
+   * The bias size corresponds to the number of columns of the matrix D.
+   * The reduction happens over the GEMM's "k" dimension.
+   *
+   * Stores Bias gradient in the auxiliary output
+   * (see CUBLASLT_MATMUL_DESC_BIAS_POINTER).
+   */
+  CUBLASLT_EPILOGUE_BGRADB = 512,
+} cublasLtEpilogue_t;
+/** Matmul heuristic search mode
+ */
+typedef enum {
+  /** ask heuristics for best algo for given usecase
+   */
+  CUBLASLT_SEARCH_BEST_FIT = 0,
+  /** only try to find best config for preconfigured algo id
+   */
+  CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID = 1,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_02 = 2,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_03 = 3,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_04 = 4,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_05 = 5,
+} cublasLtMatmulSearch_t;
+/** Algo search preference to fine tune the heuristic function. */
+typedef enum {
+  /** Search mode, see cublasLtMatmulSearch_t.
+   *
+   * uint32_t, default: CUBLASLT_SEARCH_BEST_FIT
+   */
+  CUBLASLT_MATMUL_PREF_SEARCH_MODE = 0,
+  /** Maximum allowed workspace size in bytes.
+   *
+   * uint64_t, default: 0 - no workspace allowed
+   */
+  CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES = 1,
+  /** Math mode mask, see cublasMath_t.
+   *
+   * Only algorithms with CUBLASLT_ALGO_CAP_MATHMODE_IMPL that is not masked out by this attribute are allowed.
+   *
+   * uint32_t, default: 1 (allows both default and tensor op math)
+   * DEPRECATED, will be removed in a future release, see cublasLtNumericalImplFlags_t for replacement
+   */
+  CUBLASLT_MATMUL_PREF_MATH_MODE_MASK = 2,
+  /** Reduction scheme mask, see cublasLtReductionScheme_t. Filters heuristic result to only include algo configs that
+   * use one of the required modes.
+   *
+   * E.g. mask value of 0x03 will allow only INPLACE and COMPUTE_TYPE reduction schemes.
+   *
+   * uint32_t, default: CUBLASLT_REDUCTION_SCHEME_MASK (allows all reduction schemes)
+   */
+  CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK = 3,
+  /** Gaussian mode mask, see cublasLt3mMode_t.
+   *
+   * Only algorithms with CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL that is not masked out by this attribute are allowed.
+   *
+   * uint32_t, default: CUBLASLT_3M_MODE_ALLOWED (allows both gaussian and non-gaussian algorithms)
+   * DEPRECATED, will be removed in a future release, see cublasLtNumericalImplFlags_t for replacement
+   */
+  CUBLASLT_MATMUL_PREF_GAUSSIAN_MODE_MASK = 4,
+  /** Minimum buffer alignment for matrix A (in bytes).
+   *
+   * Selecting a smaller value will exclude algorithms that can not work with matrix A that is not as strictly aligned
+   * as they need.
+   *
+   * uint32_t, default: 256
+   */
+  CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES = 5,
+  /** Minimum buffer alignment for matrix B (in bytes).
+   *
+   * Selecting a smaller value will exclude algorithms that can not work with matrix B that is not as strictly aligned
+   * as they need.
+   *
+   * uint32_t, default: 256
+   */
+  CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES = 6,
+  /** Minimum buffer alignment for matrix C (in bytes).
+   *
+   * Selecting a smaller value will exclude algorithms that can not work with matrix C that is not as strictly aligned
+   * as they need.
+   *
+   * uint32_t, default: 256
+   */
+  CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES = 7,
+  /** Minimum buffer alignment for matrix D (in bytes).
+   *
+   * Selecting a smaller value will exclude algorithms that can not work with matrix D that is not as strictly aligned
+   * as they need.
+   *
+   * uint32_t, default: 256
+   */
+  CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES = 8,
+  /** Maximum wave count.
+   *
+   * See cublasLtMatmulHeuristicResult_t::wavesCount.
+   *
+   * Selecting a non-zero value will exclude algorithms that report device utilization higher than specified.
+   *
+   * float, default: 0.0f
+   */
+  CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT = 9,
+  /** Pointer mode mask, see cublasLtPointerModeMask_t. Filters heuristic result to only include algorithms that support
+   * all required modes.
+   *
+   * uint32_t, default: (CUBLASLT_POINTER_MODE_MASK_HOST | CUBLASLT_POINTER_MODE_MASK_DEVICE) (only allows algorithms
+   * that support both regular host and device pointers)
+   */
+  CUBLASLT_MATMUL_PREF_POINTER_MODE_MASK = 10,
+  /** Epilogue selector mask, see cublasLtEpilogue_t. Filters heuristic result to only include algorithms that support
+   * all required operations.
+   *
+   * uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT (only allows algorithms that support default epilogue)
+   */
+  CUBLASLT_MATMUL_PREF_EPILOGUE_MASK = 11,
+  /** Numerical implementation details mask, see cublasLtNumericalImplFlags_t. Filters heuristic result to only include
+   * algorithms that use the allowed implementations.
+   *
+   * uint64_t, default: uint64_t(-1) (allow everything)
+   */
+  CUBLASLT_MATMUL_PREF_IMPL_MASK = 12,
+  /** Number of SMs to target for parallel execution. Optimizes heuristics for execution on a different number of SMs
+   * when user expects a concurrent stream to be using some of the device resources.
+   *
+   * Overrides the SM count target set in the matrix multiplication descriptor (see cublasLtMatmulDescAttributes_t).
+   *
+   * int32_t, default: 0 - use the number reported by the device.
+   * DEPRECATED, will be removed in a future release, see cublasLtMatmulDescAttributes_t for replacement
+   */
+  CUBLASLT_MATMUL_PREF_SM_COUNT_TARGET = 13,
+} cublasLtMatmulPreferenceAttributes_t;
+/** Internal. Do not use directly.
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceInit_internal(cublasLtMatmulPreference_t pref, size_t size);
+/** Initialize matmul heuristic search preference descriptor in pre-allocated space.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
+ */
+static inline cublasStatus_t cublasLtMatmulPreferenceInit(cublasLtMatmulPreference_t pref) {
+  return cublasLtMatmulPreferenceInit_internal(pref, sizeof(*pref));
+}
+/** Create new matmul heuristic search preference descriptor.
+ *
+ * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
+ * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceCreate(cublasLtMatmulPreference_t* pref);
+/** Destroy matmul heuristic search preference descriptor.
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceDestroy(cublasLtMatmulPreference_t pref);
+/** Set matmul heuristic search preference descriptor attribute.
+ *
+ * \param[in]  pref         The descriptor
+ * \param[in]  attr         The attribute
+ * \param[in]  buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceSetAttribute(  //
+    cublasLtMatmulPreference_t pref,
+    cublasLtMatmulPreferenceAttributes_t attr,
+    const void* buf,
+    size_t sizeInBytes);
+/** Get matmul heuristic search preference descriptor attribute.
+ *
+ * \param[in]  pref         The descriptor
+ * \param[in]  attr         The attribute
+ * \param[out] buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ * \param[out] sizeWritten  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
+ *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
+ *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceGetAttribute(  //
+    cublasLtMatmulPreference_t pref,
+    cublasLtMatmulPreferenceAttributes_t attr,
+    void* buf,
+    size_t sizeInBytes,
+    size_t* sizeWritten);
+/** Results structure used by cublasLtMatmulGetAlgo.
+ *
+ * Holds returned configured algo descriptor and its runtime properties.
+ */
+typedef struct {
+  /** Matmul algorithm descriptor.
+   *
+   * Must be initialized with cublasLtMatmulAlgoInit() if preferences' CUBLASLT_MATMUL_PERF_SEARCH_MODE is set to
+   * CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID
+   */
+  cublasLtMatmulAlgo_t algo;
+  /** Actual size of workspace memory required.
+   */
+  size_t workspaceSize;
+  /** Result status, other fields are only valid if after call to cublasLtMatmulAlgoGetHeuristic() this member is set to
+   * CUBLAS_STATUS_SUCCESS.
+   */
+  cublasStatus_t state;
+  /** Waves count - a device utilization metric.
+   *
+   * wavesCount value of 1.0f suggests that when kernel is launched it will fully occupy the GPU.
+   */
+  float wavesCount;
+  int reserved[4];
+} cublasLtMatmulHeuristicResult_t;
+/** Query cublasLt heuristic for algorithm appropriate for given use case.
+ *
+ * \param[in]      lightHandle            Pointer to the allocated cuBLASLt handle for the cuBLASLt
+ *                                        context. See cublasLtHandle_t.
+ * \param[in]      operationDesc          Handle to the matrix multiplication descriptor.
+ * \param[in]      Adesc                  Handle to the layout descriptors for matrix A.
+ * \param[in]      Bdesc                  Handle to the layout descriptors for matrix B.
+ * \param[in]      Cdesc                  Handle to the layout descriptors for matrix C.
+ * \param[in]      Ddesc                  Handle to the layout descriptors for matrix D.
+ * \param[in]      preference             Pointer to the structure holding the heuristic search
+ *                                        preferences descriptor. See cublasLtMatrixLayout_t.
+ * \param[in]      requestedAlgoCount     Size of heuristicResultsArray (in elements) and requested
+ *                                        maximum number of algorithms to return.
+ * \param[in, out] heuristicResultsArray  Output algorithms and associated runtime characteristics,
+ *                                        ordered in increasing estimated compute time.
+ * \param[out]     returnAlgoCount        The number of heuristicResultsArray elements written.
+ *
+ * \retval  CUBLAS_STATUS_INVALID_VALUE   if requestedAlgoCount is less or equal to zero
+ * \retval  CUBLAS_STATUS_NOT_SUPPORTED   if no heuristic function available for current configuration
+ * \retval  CUBLAS_STATUS_SUCCESS         if query was successful, inspect
+ *                                        heuristicResultsArray[0 to (returnAlgoCount - 1)].state
+ *                                        for detail status of results
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoGetHeuristic(cublasLtHandle_t lightHandle,
+                                                           cublasLtMatmulDesc_t operationDesc,
+                                                           cublasLtMatrixLayout_t Adesc,
+                                                           cublasLtMatrixLayout_t Bdesc,
+                                                           cublasLtMatrixLayout_t Cdesc,
+                                                           cublasLtMatrixLayout_t Ddesc,
+                                                           cublasLtMatmulPreference_t preference,
+                                                           int requestedAlgoCount,
+                                                           cublasLtMatmulHeuristicResult_t heuristicResultsArray[],
+                                                           int* returnAlgoCount);
+/* ---------------------------------------------------------------------------------------*/
+/* Lower level API to be able to implement own Heuristic and Find routines                */
+/* ---------------------------------------------------------------------------------------*/
+/** Routine to get all algo IDs that can potentially run
+ *
+ * \param[in]  int              requestedAlgoCount requested number of algos (must be less or equal to size of algoIdsA
+ * (in elements)) \param[out] algoIdsA         array to write algoIds to \param[out] returnAlgoCount  number of algoIds
+ * actually written
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if requestedAlgoCount is less or equal to zero
+ * \retval     CUBLAS_STATUS_SUCCESS        if query was successful, inspect returnAlgoCount to get actual number of IDs
+ *                                          available
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoGetIds(cublasLtHandle_t lightHandle,
+                                                     cublasComputeType_t computeType,
+                                                     cudaDataType_t scaleType,
+                                                     cudaDataType_t Atype,
+                                                     cudaDataType_t Btype,
+                                                     cudaDataType_t Ctype,
+                                                     cudaDataType_t Dtype,
+                                                     int requestedAlgoCount,
+                                                     int algoIdsArray[],
+                                                     int* returnAlgoCount);
+/** Initialize algo structure
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if algo is NULL or algoId is outside of recognized range
+ * \retval     CUBLAS_STATUS_NOT_SUPPORTED  if algoId is not supported for given combination of data types
+ * \retval     CUBLAS_STATUS_SUCCESS        if the structure was successfully initialized
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoInit(cublasLtHandle_t lightHandle,
+                                                   cublasComputeType_t computeType,
+                                                   cudaDataType_t scaleType,
+                                                   cudaDataType_t Atype,
+                                                   cudaDataType_t Btype,
+                                                   cudaDataType_t Ctype,
+                                                   cudaDataType_t Dtype,
+                                                   int algoId,
+                                                   cublasLtMatmulAlgo_t* algo);
+/** Check configured algo descriptor for correctness and support on current device.
+ *
+ * Result includes required workspace size and calculated wave count.
+ *
+ * CUBLAS_STATUS_SUCCESS doesn't fully guarantee algo will run (will fail if e.g. buffers are not correctly aligned);
+ * but if cublasLtMatmulAlgoCheck fails, the algo will not run.
+ *
+ * \param[in]  algo    algo configuration to check
+ * \param[out] result  result structure to report algo runtime characteristics; algo field is never updated
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if matrix layout descriptors or operation descriptor don't match algo
+ *                                          descriptor
+ * \retval     CUBLAS_STATUS_NOT_SUPPORTED  if algo configuration or data type combination is not currently supported on
+ *                                          given device
+ * \retval     CUBLAS_STATUS_ARCH_MISMATCH  if algo configuration cannot be run using the selected device
+ * \retval     CUBLAS_STATUS_SUCCESS        if check was successful
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoCheck(  //
+    cublasLtHandle_t lightHandle,
+    cublasLtMatmulDesc_t operationDesc,
+    cublasLtMatrixLayout_t Adesc,
+    cublasLtMatrixLayout_t Bdesc,
+    cublasLtMatrixLayout_t Cdesc,
+    cublasLtMatrixLayout_t Ddesc,
+    const cublasLtMatmulAlgo_t* algo,  ///< may point to result->algo
+    cublasLtMatmulHeuristicResult_t* result);
+/** Capabilities Attributes that can be retrieved from an initialized Algo structure
+ */
+typedef enum {
+  /** support for split K, see CUBLASLT_ALGO_CONFIG_SPLITK_NUM
+   *
+   * int32_t, 0 means no support, supported otherwise
+   */
+  CUBLASLT_ALGO_CAP_SPLITK_SUPPORT = 0,
+  /** reduction scheme mask, see cublasLtReductionScheme_t; shows supported reduction schemes, if reduction scheme is
+   * not masked out it is supported.
+   *
+   * e.g. int isReductionSchemeComputeTypeSupported ? (reductionSchemeMask & CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE) ==
+   * CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE ? 1 : 0;
+   *
+   * uint32_t
+   */
+  CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK = 1,
+  /** support for cta swizzling, see CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING
+   *
+   * uint32_t, 0 means no support, 1 means supported value of 1, other values are reserved
+   */
+  CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT = 2,
+  /** support strided batch
+   *
+   * int32_t, 0 means no support, supported otherwise
+   */
+  CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT = 3,
+  /** support results out of place (D != C in D = alpha.A.B + beta.C)
+   *
+   * int32_t, 0 means no support, supported otherwise
+   */
+  CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT = 4,
+  /** syrk/herk support (on top of regular gemm)
+   *
+   * int32_t, 0 means no support, supported otherwise
+   */
+  CUBLASLT_ALGO_CAP_UPLO_SUPPORT = 5,
+  /** tile ids possible to use, see cublasLtMatmulTile_t; if no tile ids are supported use
+   * CUBLASLT_MATMUL_TILE_UNDEFINED
+   *
+   * use cublasLtMatmulAlgoCapGetAttribute() with sizeInBytes=0 to query actual count
+   *
+   * array of uint32_t
+   */
+  CUBLASLT_ALGO_CAP_TILE_IDS = 6,
+  /** custom option range is from 0 to CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX (inclusive), see
+   * CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION
+   *
+   * int32_t
+   */
+  CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX = 7,
+  /** whether algorithm is using regular compute or tensor operations
+   *
+   * int32_t 0 means regular compute, 1 means tensor operations;
+   * DEPRECATED
+   */
+  CUBLASLT_ALGO_CAP_MATHMODE_IMPL = 8,
+  /** whether algorithm implements gaussian optimization of complex matrix multiplication, see cublasMath_t
+   *
+   * int32_t 0 means regular compute, 1 means gaussian;
+   * DEPRECATED
+   */
+  CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL = 9,
+  /** whether algorithm supports custom (not COL or ROW memory order), see cublasLtOrder_t
+   *
+   * int32_t 0 means only COL and ROW memory order is allowed, non-zero means that algo might have different
+   * requirements;
+   */
+  CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER = 10,
+  /** bitmask enumerating pointer modes algorithm supports
+   *
+   * uint32_t, see cublasLtPointerModeMask_t
+   */
+  CUBLASLT_ALGO_CAP_POINTER_MODE_MASK = 11,
+  /** bitmask enumerating kinds of postprocessing algorithm supports in the epilogue
+   *
+   * uint32_t, see cublasLtEpilogue_t
+   */
+  CUBLASLT_ALGO_CAP_EPILOGUE_MASK = 12,
+  /** stages ids possible to use, see cublasLtMatmulStages_t; if no stages ids are supported use
+   * CUBLASLT_MATMUL_STAGES_UNDEFINED
+   *
+   * use cublasLtMatmulAlgoCapGetAttribute() with sizeInBytes=0 to query actual count
+   *
+   * array of uint32_t
+   */
+  CUBLASLT_ALGO_CAP_STAGES_IDS = 13,
+  /** support for nagative ld for all of the matrices
+   *
+   * int32_t 0 means no support, supported otherwise
+   */
+  CUBLASLT_ALGO_CAP_LD_NEGATIVE = 14,
+  /** details about algorithm's implementation that affect it's numerical behavior
+   *
+   * uint64_t, see cublasLtNumericalImplFlags_t
+   */
+  CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS = 15,
+  /** minimum alignment required for A matrix in bytes
+   *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
+   *
+   * uint32_t
+   */
+  CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES = 16,
+  /** minimum alignment required for B matrix in bytes
+   *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
+   *
+   * uint32_t
+   */
+  CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES = 17,
+  /** minimum alignment required for C matrix in bytes
+   *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
+   *
+   * uint32_t
+   */
+  CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES = 18,
+  /** minimum alignment required for D matrix in bytes
+   *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
+   *
+   * uint32_t
+   */
+  CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES = 19,
+} cublasLtMatmulAlgoCapAttributes_t;
+/** Get algo capability attribute.
+ *
+ * E.g. to get list of supported Tile IDs:
+ *      cublasLtMatmulTile_t tiles[CUBLASLT_MATMUL_TILE_END];
+ *      size_t num_tiles, size_written;
+ *      if (cublasLtMatmulAlgoCapGetAttribute(algo, CUBLASLT_ALGO_CAP_TILE_IDS, tiles, sizeof(tiles), size_written) ==
+ * CUBLAS_STATUS_SUCCESS) { num_tiles = size_written / sizeof(tiles[0]);
+ *      }
+ *
+ * \param[in]  algo         The algo descriptor
+ * \param[in]  attr         The attribute
+ * \param[out] buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ * \param[out] sizeWritten  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
+ *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
+ *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoCapGetAttribute(const cublasLtMatmulAlgo_t* algo,
+                                                              cublasLtMatmulAlgoCapAttributes_t attr,
+                                                              void* buf,
+                                                              size_t sizeInBytes,
+                                                              size_t* sizeWritten);
+/** Algo Configuration Attributes that can be set according to the Algo capabilities
+ */
+typedef enum {
+  /** algorithm index, see cublasLtMatmulAlgoGetIds()
+   *
+   * readonly, set by cublasLtMatmulAlgoInit()
+   * int32_t
+   */
+  CUBLASLT_ALGO_CONFIG_ID = 0,
+  /** tile id, see cublasLtMatmulTile_t
+   *
+   * uint32_t, default: CUBLASLT_MATMUL_TILE_UNDEFINED
+   */
+  CUBLASLT_ALGO_CONFIG_TILE_ID = 1,
+  /** Number of K splits. If the number of K splits is greater than one, SPLITK_NUM parts
+   * of matrix multiplication will be computed in parallel. The results will be accumulated
+   * according to CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME
+   *
+   * int32_t, default: 1
+   */
+  CUBLASLT_ALGO_CONFIG_SPLITK_NUM = 2,
+  /** reduction scheme, see cublasLtReductionScheme_t
+   *
+   * uint32_t, default: CUBLASLT_REDUCTION_SCHEME_NONE
+   */
+  CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME = 3,
+  /** cta swizzling, change mapping from CUDA grid coordinates to parts of the matrices
+   *
+   * possible values: 0, 1, other values reserved
+   *
+   * uint32_t, default: 0
+   */
+  CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING = 4,
+  /** custom option, each algorithm can support some custom options that don't fit description of the other config
+   * attributes, see CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX to get accepted range for any specific case
+   *
+   * uint32_t, default: 0
+   */
+  CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION = 5,
+  /** stages id, see cublasLtMatmulStages_t
+   *
+   * uint32_t, default: CUBLASLT_MATMUL_STAGES_UNDEFINED
+   */
+  CUBLASLT_ALGO_CONFIG_STAGES_ID = 6,
+  /** inner shape id, see cublasLtMatmulInnerShape_t
+   *
+   * uint16_t, default: 0 (CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED)
+   */
+  CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID = 7,
+  /** Thread Block Cluster shape id, see cublasLtClusterShape_t. Defines cluster size to use.
+   *
+   * uint16_t, default: 0 (CUBLASLT_CLUSTER_SHAPE_AUTO)
+   */
+  CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID = 8,
+} cublasLtMatmulAlgoConfigAttributes_t;
+/** Set algo configuration attribute.
+ *
+ * \param[in]  algo         The algo descriptor
+ * \param[in]  attr         The attribute
+ * \param[in]  buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoConfigSetAttribute(cublasLtMatmulAlgo_t* algo,
+                                                                 cublasLtMatmulAlgoConfigAttributes_t attr,
+                                                                 const void* buf,
+                                                                 size_t sizeInBytes);
+/** Get algo configuration attribute.
+ *
+ * \param[in]  algo         The algo descriptor
+ * \param[in]  attr         The attribute
+ * \param[out] buf          memory address containing the new value
+ * \param[in]  sizeInBytes  size of buf buffer for verification (in bytes)
+ * \param[out] sizeWritten  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
+ *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
+ *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
+ *                                          selected attribute
+ * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
+ */
+cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoConfigGetAttribute(const cublasLtMatmulAlgo_t* algo,
+                                                                 cublasLtMatmulAlgoConfigAttributes_t attr,
+                                                                 void* buf,
+                                                                 size_t sizeInBytes,
+                                                                 size_t* sizeWritten);
+/** Experimental: Logger callback type.
+ */
+typedef void (*cublasLtLoggerCallback_t)(int logLevel, const char* functionName, const char* message);
+/** Experimental: Logger callback setter.
+ *
+ * \param[in]  callback                     a user defined callback function to be called by the logger
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS        if callback was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtLoggerSetCallback(cublasLtLoggerCallback_t callback);
+/** Experimental: Log file setter.
+ *
+ * \param[in]  file                         an open file with write permissions
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS        if log file was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtLoggerSetFile(FILE* file);
+/** Experimental: Open log file.
+ *
+ * \param[in]  logFile                      log file path. if the log file does not exist, it will be created
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS        if log file was created successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtLoggerOpenFile(const char* logFile);
+/** Experimental: Log level setter.
+ *
+ * \param[in]  level                        log level, should be one of the following:
+ *                                          0. Off
+ *                                          1. Errors
+ *                                          2. Performance Trace
+ *                                          3. Performance Hints
+ *                                          4. Heuristics Trace
+ *                                          5. API Trace
+ *
+ * \retval     CUBLAS_STATUS_INVALID_VALUE  if log level is not one of the above levels
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS        if log level was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtLoggerSetLevel(int level);
+/** Experimental: Log mask setter.
+ *
+ * \param[in]  mask                         log mask, should be a combination of the following masks:
+ *                                          0.  Off
+ *                                          1.  Errors
+ *                                          2.  Performance Trace
+ *                                          4.  Performance Hints
+ *                                          8.  Heuristics Trace
+ *                                          16. API Trace
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS        if log mask was set successfully
+ */
+cublasStatus_t CUBLASWINAPI cublasLtLoggerSetMask(int mask);
+/** Experimental: Disable logging for the entire session.
+ *
+ * \retval     CUBLAS_STATUS_SUCCESS        if disabled logging
+ */
+cublasStatus_t CUBLASWINAPI cublasLtLoggerForceDisable();
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/cublas_api.h ADDED Viewed

The diff for this file is too large to render. See raw diff

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/include/cublas_v2.h ADDED Viewed

	@@ -0,0 +1,273 @@

+/*
+ * Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+/*
+ * This is the public header file for the new CUBLAS library API, it mapped the generic
+ * Cublas name functions to the actual _v2 implementations.
+ */
+#if !defined(CUBLAS_V2_H_)
+#define CUBLAS_V2_H_
+#undef CUBLASAPI
+#ifdef __CUDACC__
+#define CUBLASAPI __host__ __device__
+#else
+#define CUBLASAPI
+#endif
+#include "cublas_api.h"
+#define cublasCreate cublasCreate_v2
+#define cublasDestroy cublasDestroy_v2
+#define cublasGetVersion cublasGetVersion_v2
+#define cublasSetWorkspace cublasSetWorkspace_v2
+#define cublasSetStream cublasSetStream_v2
+#define cublasGetStream cublasGetStream_v2
+#define cublasGetPointerMode cublasGetPointerMode_v2
+#define cublasSetPointerMode cublasSetPointerMode_v2
+/* Blas3 Routines   */
+#define cublasSnrm2 cublasSnrm2_v2
+#define cublasDnrm2 cublasDnrm2_v2
+#define cublasScnrm2 cublasScnrm2_v2
+#define cublasDznrm2 cublasDznrm2_v2
+#define cublasSdot cublasSdot_v2
+#define cublasDdot cublasDdot_v2
+#define cublasCdotu cublasCdotu_v2
+#define cublasCdotc cublasCdotc_v2
+#define cublasZdotu cublasZdotu_v2
+#define cublasZdotc cublasZdotc_v2
+#define cublasSscal cublasSscal_v2
+#define cublasDscal cublasDscal_v2
+#define cublasCscal cublasCscal_v2
+#define cublasCsscal cublasCsscal_v2
+#define cublasZscal cublasZscal_v2
+#define cublasZdscal cublasZdscal_v2
+#define cublasSaxpy cublasSaxpy_v2
+#define cublasDaxpy cublasDaxpy_v2
+#define cublasCaxpy cublasCaxpy_v2
+#define cublasZaxpy cublasZaxpy_v2
+#define cublasScopy cublasScopy_v2
+#define cublasDcopy cublasDcopy_v2
+#define cublasCcopy cublasCcopy_v2
+#define cublasZcopy cublasZcopy_v2
+#define cublasSswap cublasSswap_v2
+#define cublasDswap cublasDswap_v2
+#define cublasCswap cublasCswap_v2
+#define cublasZswap cublasZswap_v2
+#define cublasIsamax cublasIsamax_v2
+#define cublasIdamax cublasIdamax_v2
+#define cublasIcamax cublasIcamax_v2
+#define cublasIzamax cublasIzamax_v2
+#define cublasIsamin cublasIsamin_v2
+#define cublasIdamin cublasIdamin_v2
+#define cublasIcamin cublasIcamin_v2
+#define cublasIzamin cublasIzamin_v2
+#define cublasSasum cublasSasum_v2
+#define cublasDasum cublasDasum_v2
+#define cublasScasum cublasScasum_v2
+#define cublasDzasum cublasDzasum_v2
+#define cublasSrot cublasSrot_v2
+#define cublasDrot cublasDrot_v2
+#define cublasCrot cublasCrot_v2
+#define cublasCsrot cublasCsrot_v2
+#define cublasZrot cublasZrot_v2
+#define cublasZdrot cublasZdrot_v2
+#define cublasSrotg cublasSrotg_v2
+#define cublasDrotg cublasDrotg_v2
+#define cublasCrotg cublasCrotg_v2
+#define cublasZrotg cublasZrotg_v2
+#define cublasSrotm cublasSrotm_v2
+#define cublasDrotm cublasDrotm_v2
+#define cublasSrotmg cublasSrotmg_v2
+#define cublasDrotmg cublasDrotmg_v2
+/* Blas2 Routines */
+#define cublasSgemv cublasSgemv_v2
+#define cublasDgemv cublasDgemv_v2
+#define cublasCgemv cublasCgemv_v2
+#define cublasZgemv cublasZgemv_v2
+#define cublasSgbmv cublasSgbmv_v2
+#define cublasDgbmv cublasDgbmv_v2
+#define cublasCgbmv cublasCgbmv_v2
+#define cublasZgbmv cublasZgbmv_v2
+#define cublasStrmv cublasStrmv_v2
+#define cublasDtrmv cublasDtrmv_v2
+#define cublasCtrmv cublasCtrmv_v2
+#define cublasZtrmv cublasZtrmv_v2
+#define cublasStbmv cublasStbmv_v2
+#define cublasDtbmv cublasDtbmv_v2
+#define cublasCtbmv cublasCtbmv_v2
+#define cublasZtbmv cublasZtbmv_v2
+#define cublasStpmv cublasStpmv_v2
+#define cublasDtpmv cublasDtpmv_v2
+#define cublasCtpmv cublasCtpmv_v2
+#define cublasZtpmv cublasZtpmv_v2
+#define cublasStrsv cublasStrsv_v2
+#define cublasDtrsv cublasDtrsv_v2
+#define cublasCtrsv cublasCtrsv_v2
+#define cublasZtrsv cublasZtrsv_v2
+#define cublasStpsv cublasStpsv_v2
+#define cublasDtpsv cublasDtpsv_v2
+#define cublasCtpsv cublasCtpsv_v2
+#define cublasZtpsv cublasZtpsv_v2
+#define cublasStbsv cublasStbsv_v2
+#define cublasDtbsv cublasDtbsv_v2
+#define cublasCtbsv cublasCtbsv_v2
+#define cublasZtbsv cublasZtbsv_v2
+#define cublasSsymv cublasSsymv_v2
+#define cublasDsymv cublasDsymv_v2
+#define cublasCsymv cublasCsymv_v2
+#define cublasZsymv cublasZsymv_v2
+#define cublasChemv cublasChemv_v2
+#define cublasZhemv cublasZhemv_v2
+#define cublasSsbmv cublasSsbmv_v2
+#define cublasDsbmv cublasDsbmv_v2
+#define cublasChbmv cublasChbmv_v2
+#define cublasZhbmv cublasZhbmv_v2
+#define cublasSspmv cublasSspmv_v2
+#define cublasDspmv cublasDspmv_v2
+#define cublasChpmv cublasChpmv_v2
+#define cublasZhpmv cublasZhpmv_v2
+#define cublasSger cublasSger_v2
+#define cublasDger cublasDger_v2
+#define cublasCgeru cublasCgeru_v2
+#define cublasCgerc cublasCgerc_v2
+#define cublasZgeru cublasZgeru_v2
+#define cublasZgerc cublasZgerc_v2
+#define cublasSsyr cublasSsyr_v2
+#define cublasDsyr cublasDsyr_v2
+#define cublasCsyr cublasCsyr_v2
+#define cublasZsyr cublasZsyr_v2
+#define cublasCher cublasCher_v2
+#define cublasZher cublasZher_v2
+#define cublasSspr cublasSspr_v2
+#define cublasDspr cublasDspr_v2
+#define cublasChpr cublasChpr_v2
+#define cublasZhpr cublasZhpr_v2
+#define cublasSsyr2 cublasSsyr2_v2
+#define cublasDsyr2 cublasDsyr2_v2
+#define cublasCsyr2 cublasCsyr2_v2
+#define cublasZsyr2 cublasZsyr2_v2
+#define cublasCher2 cublasCher2_v2
+#define cublasZher2 cublasZher2_v2
+#define cublasSspr2 cublasSspr2_v2
+#define cublasDspr2 cublasDspr2_v2
+#define cublasChpr2 cublasChpr2_v2
+#define cublasZhpr2 cublasZhpr2_v2
+/* Blas3 Routines   */
+#define cublasSgemm cublasSgemm_v2
+#define cublasDgemm cublasDgemm_v2
+#define cublasCgemm cublasCgemm_v2
+#define cublasZgemm cublasZgemm_v2
+#define cublasSsyrk cublasSsyrk_v2
+#define cublasDsyrk cublasDsyrk_v2
+#define cublasCsyrk cublasCsyrk_v2
+#define cublasZsyrk cublasZsyrk_v2
+#define cublasCherk cublasCherk_v2
+#define cublasZherk cublasZherk_v2
+#define cublasSsyr2k cublasSsyr2k_v2
+#define cublasDsyr2k cublasDsyr2k_v2
+#define cublasCsyr2k cublasCsyr2k_v2
+#define cublasZsyr2k cublasZsyr2k_v2
+#define cublasCher2k cublasCher2k_v2
+#define cublasZher2k cublasZher2k_v2
+#define cublasSsymm cublasSsymm_v2
+#define cublasDsymm cublasDsymm_v2
+#define cublasCsymm cublasCsymm_v2
+#define cublasZsymm cublasZsymm_v2
+#define cublasChemm cublasChemm_v2
+#define cublasZhemm cublasZhemm_v2
+#define cublasStrsm cublasStrsm_v2
+#define cublasDtrsm cublasDtrsm_v2
+#define cublasCtrsm cublasCtrsm_v2
+#define cublasZtrsm cublasZtrsm_v2
+#define cublasStrmm cublasStrmm_v2
+#define cublasDtrmm cublasDtrmm_v2
+#define cublasCtrmm cublasCtrmm_v2
+#define cublasZtrmm cublasZtrmm_v2
+#endif /* !defined(CUBLAS_V2_H_) */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/lib/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling.h ADDED Viewed

	@@ -0,0 +1,923 @@

+/*
+ * Copyright 2020-2022 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(_CUPTI_PCSAMPLING_H_)
+#define _CUPTI_PCSAMPLING_H_
+#include <cuda.h>
+#include <stdint.h>
+#include <stddef.h>
+#include "cupti_result.h"
+#ifndef CUPTIAPI
+#ifdef _WIN32
+#define CUPTIAPI __stdcall
+#else
+#define CUPTIAPI
+#endif
+#endif
+#define ACTIVITY_RECORD_ALIGNMENT 8
+#if defined(_WIN32) // Windows 32- and 64-bit
+#define START_PACKED_ALIGNMENT __pragma(pack(push,1)) // exact fit - no padding
+#define PACKED_ALIGNMENT __declspec(align(ACTIVITY_RECORD_ALIGNMENT))
+#define END_PACKED_ALIGNMENT __pragma(pack(pop))
+#elif defined(__GNUC__) // GCC
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT __attribute__ ((__packed__)) __attribute__ ((aligned (ACTIVITY_RECORD_ALIGNMENT)))
+#define END_PACKED_ALIGNMENT
+#else // all other compilers
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT
+#define END_PACKED_ALIGNMENT
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \defgroup CUPTI_PCSAMPLING_API CUPTI PC Sampling API
+ * Functions, types, and enums that implement the CUPTI PC Sampling API.
+ * @{
+ */
+#ifndef CUPTI_PCSAMPLING_STRUCT_SIZE
+#define CUPTI_PCSAMPLING_STRUCT_SIZE(type_, lastfield_)                     (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
+#endif
+#ifndef CUPTI_STALL_REASON_STRING_SIZE
+#define CUPTI_STALL_REASON_STRING_SIZE                                            128
+#endif
+/**
+ * \brief PC Sampling collection mode
+ */
+typedef enum
+{
+  /**
+   * INVALID Value
+   */
+  CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID                   = 0,
+  /**
+   * Continuous mode. Kernels are not serialized in this mode.
+   */
+  CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS                = 1,
+  /**
+   * Serialized mode. Kernels are serialized in this mode.
+   */
+  CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED         = 2,
+} CUpti_PCSamplingCollectionMode;
+/**
+ * \brief PC Sampling stall reasons
+ */
+typedef struct PACKED_ALIGNMENT
+{
+  /**
+   * [r] Collected stall reason index
+   */
+  uint32_t pcSamplingStallReasonIndex;
+  /**
+   * [r] Number of times the PC was sampled with the stallReason.
+   */
+  uint32_t samples;
+} CUpti_PCSamplingStallReason;
+/**
+ * \brief PC Sampling data
+ */
+typedef struct PACKED_ALIGNMENT
+{
+  /**
+   * [w] Size of the data structure.
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [r] Unique cubin id
+   */
+  uint64_t cubinCrc;
+  /**
+   * [r] PC offset
+   */
+  uint64_t pcOffset;
+  /**
+   * The function's unique symbol index in the module.
+   */
+  uint32_t functionIndex;
+  /**
+   * Padding
+   */
+  uint32_t pad;
+  /**
+   * [r] The function name. This name string might be shared across all the records
+   * including records from activity APIs representing the same function, and so it should not be
+   * modified or freed until post processing of all the records is done. Once done, it is user’s responsibility to
+   * free the memory using free() function.
+   */
+  char* functionName;
+  /**
+   * [r] Collected stall reason count
+   */
+  size_t stallReasonCount;
+  /**
+   * [r] Stall reason id
+   * Total samples
+   */
+  CUpti_PCSamplingStallReason *stallReason;
+} CUpti_PCSamplingPCData;
+/**
+ * \brief PC Sampling output data format
+ */
+typedef enum
+{
+    CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_INVALID          = 0,
+  /**
+   * HW buffer data will be parsed during collection of data
+   */
+    CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED           = 1,
+} CUpti_PCSamplingOutputDataFormat;
+/**
+ * \brief Collected PC Sampling data
+ *
+ */
+typedef struct PACKED_ALIGNMENT
+{
+  /**
+   * [w] Size of the data structure.
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Number of PCs to be collected
+   */
+  size_t collectNumPcs;
+  /**
+   * [r] Number of samples collected across all PCs.
+   * It includes samples for user modules, samples for non-user kernels and dropped samples.
+   * It includes counts for all non selected stall reasons.
+   * CUPTI does not provide PC records for non-user kernels.
+   * CUPTI does not provide PC records for instructions for which all selected stall reason metrics counts are zero.
+   */
+  uint64_t totalSamples;
+  /**
+   * [r] Number of samples that were dropped by hardware due to backpressure/overflow.
+   */
+  uint64_t droppedSamples;
+  /**
+   * [r] Number of PCs collected
+   */
+  size_t totalNumPcs;
+  /**
+   * [r] Number of PCs available for collection
+   */
+  size_t remainingNumPcs;
+  /**
+   * [r] Unique identifier for each range.
+   * Data collected across multiple ranges in multiple buffers can be identified using range id.
+   */
+  uint64_t rangeId;
+  /**
+   * [r] Profiled PC data
+   * This data struct should have enough memory to collect number of PCs mentioned in \brief collectNumPcs
+   */
+  CUpti_PCSamplingPCData *pPcData;
+  /**
+   * [r] Number of samples collected across all non user kernels PCs.
+   * It includes samples for non-user kernels.
+   * It includes counts for all non selected stall reasons as well.
+   * CUPTI does not provide PC records for non-user kernels.
+   */
+  uint64_t nonUsrKernelsTotalSamples;
+} CUpti_PCSamplingData;
+/**
+ * \brief PC Sampling configuration attributes
+ *
+ * PC Sampling configuration attribute types. These attributes can be read
+ * using \ref cuptiPCSamplingGetConfigurationAttribute and can be written
+ * using \ref cuptiPCSamplingSetConfigurationAttribute. Attributes marked
+ * [r] can only be read using \ref cuptiPCSamplingGetConfigurationAttribute
+ * [w] can only be written using \ref cuptiPCSamplingSetConfigurationAttribute
+ * [rw] can be read using \ref cuptiPCSamplingGetConfigurationAttribute and
+ * written using \ref cuptiPCSamplingSetConfigurationAttribute
+ */
+typedef enum
+{
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_INVALID                            = 0,
+  /**
+   * [rw] Sampling period for PC Sampling.
+   * DEFAULT - CUPTI defined value based on number of SMs
+   * Valid values for the sampling
+   * periods are between 5 to 31 both inclusive. This will set the
+   * sampling period to (2^samplingPeriod) cycles.
+   * For e.g. for sampling period = 5 to 31, cycles = 32, 64, 128,..., 2^31
+   * Value is a uint32_t
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD                    = 1,
+  /**
+   * [w] Number of stall reasons to collect.
+   * DEFAULT - All stall reasons will be collected
+   * Value is a size_t
+   * [w] Stall reasons to collect
+   * DEFAULT - All stall reasons will be collected
+   * Input value should be a pointer pointing to array of stall reason indexes
+   * containing all the stall reason indexes to collect.
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON                       = 2,
+  /**
+   * [rw] Size of SW buffer for raw PC counter data downloaded from HW buffer
+   * DEFAULT - 1 MB, which can accommodate approximately 5500 PCs
+   * with all stall reasons
+   * Approximately it takes 16 Bytes (and some fixed size memory)
+   * to accommodate one PC with one stall reason
+   * For e.g. 1 PC with 1 stall reason = 32 Bytes
+   *          1 PC with 2 stall reason = 48 Bytes
+   *          1 PC with 4 stall reason = 96 Bytes
+   * Value is a size_t
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE                = 3,
+  /**
+   * [rw] Size of HW buffer in bytes
+   * DEFAULT - 512 MB
+   * If sampling period is too less, HW buffer can overflow
+   * and drop PC data
+   * Value is a size_t
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE               = 4,
+  /**
+   * [rw] PC Sampling collection mode
+   * DEFAULT - CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS
+   * Input value should be of type \ref CUpti_PCSamplingCollectionMode.
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE                    = 5,
+  /**
+   * [rw] Control over PC Sampling data collection range
+   * Default - 0
+   * 1 - Allows user to start and stop PC Sampling using APIs -
+   * \ref cuptiPCSamplingStart() - Start PC Sampling
+   * \ref cuptiPCSamplingStop() - Stop PC Sampling
+   * Value is a uint32_t
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL          = 6,
+  /**
+   * [w] Value for output data format
+   * Default - CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED
+   * Input value should be of type \ref CUpti_PCSamplingOutputDataFormat.
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT                 = 7,
+  /**
+   * [w] Data buffer to hold collected PC Sampling data PARSED_DATA
+   * Default - none.
+   * Buffer type is void * which can point to PARSED_DATA
+   * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
+   */
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER               = 8,
+  CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_FORCE_INT                          = 0x7fffffff,
+} CUpti_PCSamplingConfigurationAttributeType;
+/**
+ * \brief PC sampling configuration information structure
+ *
+ * This structure provides \ref CUpti_PCSamplingConfigurationAttributeType which can be configured
+ * or queried for PC sampling configuration
+ */
+typedef struct
+{
+  /**
+   * Refer \ref CUpti_PCSamplingConfigurationAttributeType for all supported attribute types
+   */
+  CUpti_PCSamplingConfigurationAttributeType attributeType;
+  /*
+   * Configure or query status for \p attributeType
+   * CUPTI_SUCCESS for valid \p attributeType and \p attributeData
+   * CUPTI_ERROR_INVALID_OPERATION if \p attributeData is not valid
+   * CUPTI_ERROR_INVALID_PARAMETER if \p attributeType is not valid
+   */
+  CUptiResult attributeStatus;
+  union
+  {
+    /**
+     * Invalid Value
+     */
+    struct
+    {
+      uint64_t data[3];
+    } invalidData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD
+     */
+    struct
+    {
+      uint32_t samplingPeriod;
+    } samplingPeriodData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON
+     */
+    struct
+    {
+      size_t stallReasonCount;
+      uint32_t *pStallReasonIndex;
+    } stallReasonData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE
+     */
+    struct
+    {
+      size_t scratchBufferSize;
+    } scratchBufferSizeData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE
+     */
+    struct
+    {
+      size_t hardwareBufferSize;
+    } hardwareBufferSizeData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE
+     */
+    struct
+    {
+      CUpti_PCSamplingCollectionMode collectionMode;
+    } collectionModeData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
+     */
+    struct
+    {
+      uint32_t enableStartStopControl;
+    } enableStartStopControlData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT
+     */
+    struct
+    {
+      CUpti_PCSamplingOutputDataFormat outputDataFormat;
+    } outputDataFormatData;
+    /**
+     * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER
+     */
+    struct
+    {
+      void *samplingDataBuffer;
+    } samplingDataBufferData;
+  } attributeData;
+} CUpti_PCSamplingConfigurationInfo;
+/**
+ * \brief PC sampling configuration structure
+ *
+ * This structure configures PC sampling using \ref cuptiPCSamplingSetConfigurationAttribute
+ * and queries PC sampling default configuration using \ref cuptiPCSamplingGetConfigurationAttribute
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingConfigurationInfoParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+  /**
+   * [w] Number of attributes to configure using \ref cuptiPCSamplingSetConfigurationAttribute or query
+   * using \ref cuptiPCSamplingGetConfigurationAttribute
+   */
+  size_t numAttributes;
+  /**
+   * Refer \ref CUpti_PCSamplingConfigurationInfo
+   */
+  CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
+} CUpti_PCSamplingConfigurationInfoParams;
+#define CUpti_PCSamplingConfigurationInfoParamsSize                 CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingConfigurationInfoParams,pPCSamplingConfigurationInfo)
+/**
+ * \brief Write PC Sampling configuration attribute.
+ *
+ * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
+ * containing PC sampling configuration.
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
+ * some invalid \p attrib.
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if attribute \p value is not valid
+ * or any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingSetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);
+/**
+ * \brief Read PC Sampling configuration attribute.
+ *
+ * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams
+ * containing PC sampling configuration.
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
+ * some invalid attribute.
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p attrib is not valid
+ * or any \p pParams is not valid
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT indicates that
+ * the \p value buffer is too small to hold the attribute value
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingGetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams);
+/**
+ * \brief Params for cuptiPCSamplingEnable
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingGetDataParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+  /**
+   * \param pcSamplingData Data buffer to hold collected PC Sampling data PARSED_DATA
+   * Buffer type is void * which can point to PARSED_DATA
+   * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA
+   */
+  void *pcSamplingData;
+} CUpti_PCSamplingGetDataParams;
+#define CUpti_PCSamplingGetDataParamsSize                           CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetDataParams, pcSamplingData)
+/**
+ * \brief Flush GPU PC sampling data periodically.
+ *
+ * Flushing of GPU PC Sampling data is required at following point to maintain uniqueness of PCs:
+ * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, after every module load-unload-load
+ * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED, after every kernel ends
+ * If configuration option \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL
+ * is enabled, then after every range end i.e. \brief cuptiPCSamplingStop()
+ *
+ * If application is profiled in \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, with disabled
+ * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL, and there is no module unload,
+ * user can collect data in two ways:
+ * Use \brief cuptiPCSamplingGetData() API periodically
+ * Use \brief cuptiPCSamplingDisable() on application exit and read GPU PC sampling data from sampling
+ * data buffer passed during configuration.
+ * Note: In case, \brief cuptiPCSamplingGetData() API is not called periodically, then sampling data buffer
+ * passed during configuration should be large enough to hold all PCs data.
+ *       \brief cuptiPCSamplingGetData() API never does device synchronization.
+ *       It is possible that when the API is called there is some unconsumed data from the HW buffer. In this case
+ * CUPTI provides only the data available with it at that moment.
+ *
+ * \param Refer \ref CUpti_PCSamplingGetDataParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called without
+ * enabling PC sampling.
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingGetData(CUpti_PCSamplingGetDataParams *pParams);
+/**
+ * \brief Params for cuptiPCSamplingEnable
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingEnableParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+} CUpti_PCSamplingEnableParams;
+#define CUpti_PCSamplingEnableParamsSize                           CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingEnableParams, ctx)
+/**
+ * \brief Enable PC sampling.
+ *
+ * \param Refer \ref CUpti_PCSamplingEnableParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingEnable(CUpti_PCSamplingEnableParams *pParams);
+/**
+ * \brief Params for cuptiPCSamplingDisable
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+} CUpti_PCSamplingDisableParams;
+#define CUpti_PCSamplingDisableParamsSize                           CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingDisableParams, ctx)
+/**
+ * \brief Disable PC sampling.
+ *
+ * For application which doesn't destroy the CUDA context explicitly,
+ * this API does the PC Sampling tear-down, joins threads and copies PC records in the buffer provided
+ * during the PC sampling configuration. PC records which can't be accommodated in the buffer are discarded.
+ *
+ * \param Refer \ref CUpti_PCSamplingDisableParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingDisable(CUpti_PCSamplingDisableParams *pParams);
+/**
+ * \brief Params for cuptiPCSamplingStart
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingStartParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+} CUpti_PCSamplingStartParams;
+#define CUpti_PCSamplingStartParamsSize                             CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStartParams, ctx)
+/**
+ * \brief Start PC sampling.
+ *
+ * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
+ * This API can be used to mark starting of range. Set configuration option
+ * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
+ *
+ * \param Refer \ref CUpti_PCSamplingStartParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
+ * incorrect PC Sampling configuration.
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingStart(CUpti_PCSamplingStartParams *pParams);
+/**
+ * \brief Params for cuptiPCSamplingStop
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingStopParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+} CUpti_PCSamplingStopParams;
+#define CUpti_PCSamplingStopParamsSize                              CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStopParams, ctx)
+/**
+ * \brief Stop PC sampling.
+ *
+ * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs.
+ * This API can be used to mark end of range. Set configuration option
+ * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API.
+ *
+ * \param Refer \ref CUpti_PCSamplingStopParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with
+ * incorrect PC Sampling configuration.
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingStop(CUpti_PCSamplingStopParams *pParams);
+/**
+ * \brief Params for cuptiPCSamplingGetNumStallReasons
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingGetNumStallReasonsParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+  /**
+   * [r] Number of stall reasons
+   */
+  size_t *numStallReasons;
+} CUpti_PCSamplingGetNumStallReasonsParams;
+#define CUpti_PCSamplingGetNumStallReasonsParamsSize                CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetNumStallReasonsParams, numStallReasons)
+/**
+ * \brief Get PC sampling stall reason count.
+ *
+ * \param Refer \ref CUpti_PCSamplingGetNumStallReasonsParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingGetNumStallReasons(CUpti_PCSamplingGetNumStallReasonsParams *pParams);
+/**
+ * \brief Params for cuptiPCSamplingGetStallReasons
+ */
+typedef struct
+{
+  /**
+   * [w] Size of the data structure i.e. CUpti_PCSamplingGetStallReasonsParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Assign to NULL
+   */
+  void* pPriv;
+  /**
+   * [w] CUcontext
+   */
+  CUcontext ctx;
+  /**
+   * [w] Number of stall reasons
+   */
+  size_t numStallReasons;
+  /**
+   * [r] Stall reason index
+   */
+  uint32_t *stallReasonIndex;
+  /**
+   * [r] Stall reasons name
+   */
+  char **stallReasons;
+} CUpti_PCSamplingGetStallReasonsParams;
+#define CUpti_PCSamplingGetStallReasonsParamsSize                   CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetStallReasonsParams, stallReasons)
+/**
+ * \brief Get PC sampling stall reasons.
+ *
+ * \param Refer \ref CUpti_PCSamplingGetStallReasonsParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid
+ * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device
+ * does not support the API
+ */
+CUptiResult CUPTIAPI cuptiPCSamplingGetStallReasons(CUpti_PCSamplingGetStallReasonsParams *pParams);
+/**
+ * \brief Params for cuptiGetSassToSourceCorrelation
+ */
+typedef struct {
+  /**
+   * [w] Size of the data structure i.e. CUpti_GetSassToSourceCorrelationParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Pointer to cubin binary where function belongs.
+   */
+  const void* cubin;
+  /**
+   * [w] Function name to which PC belongs.
+   */
+  const char *functionName;
+  /**
+   * [w] Size of cubin binary.
+   */
+  size_t cubinSize;
+  /**
+   * [r] Line number in the source code.
+   */
+  uint32_t lineNumber;
+  /**
+   * [w] PC offset
+   */
+  uint64_t pcOffset;
+  /**
+   * [r] Path for the source file.
+   */
+  char *fileName;
+  /**
+   * [r] Path for the directory of source file.
+   */
+  char *dirName;
+} CUpti_GetSassToSourceCorrelationParams;
+#define CUpti_GetSassToSourceCorrelationParamsSize     CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetSassToSourceCorrelationParams, dirName)
+/**
+ * \brief SASS to Source correlation.
+ *
+ * \param Refer \ref CUpti_GetSassToSourceCorrelationParams
+ *
+ * It is expected from user to free allocated memory for fileName and dirName after use.
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if either of the parameters cubin or functionName
+ * is NULL or cubinSize is zero or size field is not set correctly.
+ * \retval CUPTI_ERROR_INVALID_MODULE provided cubin is invalid.
+ * \retval CUPTI_ERROR_UNKNOWN an internal error occurred.
+ * This error code is also used for cases when the function is not present in the module.
+ * A better error code will be returned in the future release.
+ */
+CUptiResult CUPTIAPI cuptiGetSassToSourceCorrelation(CUpti_GetSassToSourceCorrelationParams *pParams);
+/**
+ * \brief Params for cuptiGetCubinCrc
+ */
+typedef struct {
+  /**
+   * [w] Size of configuration structure.
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * [w] Size of cubin binary.
+   */
+  size_t cubinSize;
+  /**
+   * [w] Pointer to cubin binary
+   */
+  const void* cubin;
+  /**
+   * [r] Computed CRC will be stored in it.
+   */
+  uint64_t cubinCrc;
+} CUpti_GetCubinCrcParams;
+#define CUpti_GetCubinCrcParamsSize     CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetCubinCrcParams, cubinCrc)
+/**
+ * \brief Get the CRC of cubin.
+ *
+ * This function returns the CRC of provided cubin binary.
+ *
+ * \param Refer \ref CUpti_GetCubinCrcParams
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if parameter cubin is NULL or
+ * provided cubinSize is zero or size field is not set.
+ */
+CUptiResult CUPTIAPI cuptiGetCubinCrc(CUpti_GetCubinCrcParams *pParams);
+/**
+ * \brief Function type for callback used by CUPTI to request crc of
+ * loaded module.
+ *
+ * This callback function ask for crc of provided module in function.
+ * The provided crc will be stored in PC sampling records i.e. in the field 'cubinCrc' of the PC sampling
+ * struct CUpti_PCSamplingPCData. The CRC is uses during the offline source correlation to uniquely identify the module.
+ *
+ * \param cubin The pointer to cubin binary
+ * \param cubinSize The size of cubin binary.
+ * \param cubinCrc Returns the computed crc of cubin.
+ */
+typedef void (CUPTIAPI *CUpti_ComputeCrcCallbackFunc)(
+    const void* cubin,
+    size_t cubinSize,
+    uint64_t *cubinCrc);
+/**
+ * \brief Register callback function with CUPTI to use
+ * your own algorithm to compute cubin crc.
+ *
+ * This function registers a callback function and it gets called
+ * from CUPTI when a CUDA module is loaded.
+ *
+ * \param funcComputeCubinCrc callback is invoked when a CUDA module
+ * is loaded.
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p funcComputeCubinCrc is NULL.
+ */
+CUptiResult CUPTIAPI cuptiRegisterComputeCrcCallback(CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc);
+/** @} */ /* END CUPTI_PCSAMPLING_API */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#if defined(__cplusplus)
+}
+#endif
+#endif /*_CUPTI_PCSAMPLING_H_*/

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (220 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer_v8.h ADDED Viewed

	@@ -0,0 +1,571 @@

+/*
+ * Copyright 2017-2022 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+/*
+ *  cudnn_cnn_infer : cuDNN's basic definitions and inference CNN functions.
+ */
+#if !defined(CUDNN_CNN_INFER_H_)
+#define CUDNN_CNN_INFER_H_
+#pragma once
+#include <cuda_runtime.h>
+#include <stdint.h>
+#include "cudnn_version.h"
+#include "cudnn_ops_infer.h"
+/* These version numbers are autogenerated, do not edit manually. */
+#define CUDNN_CNN_INFER_MAJOR 8
+#define CUDNN_CNN_INFER_MINOR 7
+#define CUDNN_CNN_INFER_PATCH 0
+#if (CUDNN_CNN_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_INFER_MINOR != CUDNN_MINOR) || \
+    (CUDNN_CNN_INFER_PATCH != CUDNN_PATCHLEVEL)
+#error Version mismatch in cuDNN CNN INFER!!!
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t;
+/*
+ *  convolution mode
+ */
+typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
+/*
+ * CUDNN Reorder
+ */
+typedef enum {
+    CUDNN_DEFAULT_REORDER = 0,
+    CUDNN_NO_REORDER      = 1,
+} cudnnReorderType_t;
+typedef struct cudnnConvolutionFwdAlgoPerfStruct {
+    cudnnConvolutionFwdAlgo_t algo;
+    cudnnStatus_t status;
+    float time;
+    size_t memory;
+    cudnnDeterminism_t determinism;
+    cudnnMathType_t mathType;
+    int reserved[3];
+} cudnnConvolutionFwdAlgoPerf_t;
+/* Create an instance of convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
+/* Destroy an instance of convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
+                                int pad_h,      /* zero-padding height */
+                                int pad_w,      /* zero-padding width */
+                                int u,          /* vertical filter stride */
+                                int v,          /* horizontal filter stride */
+                                int dilation_h, /* filter dilation in the vertical dimension */
+                                int dilation_w, /* filter dilation in the horizontal dimension */
+                                cudnnConvolutionMode_t mode,
+                                cudnnDataType_t computeType);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
+                                int *pad_h,      /* zero-padding height */
+                                int *pad_w,      /* zero-padding width */
+                                int *u,          /* vertical filter stride */
+                                int *v,          /* horizontal filter stride */
+                                int *dilation_h, /* filter dilation in the vertical dimension */
+                                int *dilation_w, /* filter dilation in the horizontal dimension */
+                                cudnnConvolutionMode_t *mode,
+                                cudnnDataType_t *computeType);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
+                                int arrayLength, /* nbDims-2 size */
+                                const int padA[],
+                                const int filterStrideA[],
+                                const int dilationA[],
+                                cudnnConvolutionMode_t mode,
+                                cudnnDataType_t computeType); /* convolution data type */
+/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
+                                int arrayLengthRequested,
+                                int *arrayLength,
+                                int padA[],
+                                int strideA[],
+                                int dilationA[],
+                                cudnnConvolutionMode_t *mode,
+                                cudnnDataType_t *computeType); /* convolution data type */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
+                                      const cudnnTensorDescriptor_t inputTensorDesc,
+                                      const cudnnFilterDescriptor_t filterDesc,
+                                      int *n,
+                                      int *c,
+                                      int *h,
+                                      int *w);
+/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
+                                      const cudnnTensorDescriptor_t inputTensorDesc,
+                                      const cudnnFilterDescriptor_t filterDesc,
+                                      int nbDims,
+                                      int tensorOuputDimA[]);
+/* helper function to provide the convolution forward algo that fit best the requirement */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
+                                       const cudnnTensorDescriptor_t srcDesc,
+                                       const cudnnFilterDescriptor_t filterDesc,
+                                       const cudnnConvolutionDescriptor_t convDesc,
+                                       const cudnnTensorDescriptor_t destDesc,
+                                       const int requestedAlgoCount,
+                                       int *returnedAlgoCount,
+                                       cudnnConvolutionFwdAlgoPerf_t *perfResults);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
+                                     const cudnnTensorDescriptor_t xDesc,
+                                     const cudnnFilterDescriptor_t wDesc,
+                                     const cudnnConvolutionDescriptor_t convDesc,
+                                     const cudnnTensorDescriptor_t yDesc,
+                                     const int requestedAlgoCount,
+                                     int *returnedAlgoCount,
+                                     cudnnConvolutionFwdAlgoPerf_t *perfResults);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
+                                       const cudnnTensorDescriptor_t xDesc,
+                                       const void *x,
+                                       const cudnnFilterDescriptor_t wDesc,
+                                       const void *w,
+                                       const cudnnConvolutionDescriptor_t convDesc,
+                                       const cudnnTensorDescriptor_t yDesc,
+                                       void *y,
+                                       const int requestedAlgoCount,
+                                       int *returnedAlgoCount,
+                                       cudnnConvolutionFwdAlgoPerf_t *perfResults,
+                                       void *workSpace,
+                                       size_t workSpaceSizeInBytes);
+cudnnStatus_t CUDNNWINAPI
+cudnnIm2Col(cudnnHandle_t handle,
+            const cudnnTensorDescriptor_t xDesc,
+            const void *x,
+            const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc,
+            void *colBuffer);
+cudnnStatus_t CUDNNWINAPI
+cudnnReorderFilterAndBias(cudnnHandle_t handle,
+                          const cudnnFilterDescriptor_t filterDesc,
+                          cudnnReorderType_t reorderType,
+                          const void *filterData,
+                          void *reorderedFilterData,
+                          int reorderBias,
+                          const void *biasData,
+                          void *reorderedBiasData);
+/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
+                                        const cudnnTensorDescriptor_t xDesc,
+                                        const cudnnFilterDescriptor_t wDesc,
+                                        const cudnnConvolutionDescriptor_t convDesc,
+                                        const cudnnTensorDescriptor_t yDesc,
+                                        cudnnConvolutionFwdAlgo_t algo,
+                                        size_t *sizeInBytes);
+/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
+/* Function to perform the forward pass for batch convolution */
+cudnnStatus_t CUDNNWINAPI
+cudnnConvolutionForward(cudnnHandle_t handle,
+                        const void *alpha,
+                        const cudnnTensorDescriptor_t xDesc,
+                        const void *x,
+                        const cudnnFilterDescriptor_t wDesc,
+                        const void *w,
+                        const cudnnConvolutionDescriptor_t convDesc,
+                        cudnnConvolutionFwdAlgo_t algo,
+                        void *workSpace,
+                        size_t workSpaceSizeInBytes,
+                        const void *beta,
+                        const cudnnTensorDescriptor_t yDesc,
+                        void *y);
+/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
+cudnnStatus_t CUDNNWINAPI
+cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
+                                      const void *alpha1,
+                                      const cudnnTensorDescriptor_t xDesc,
+                                      const void *x,
+                                      const cudnnFilterDescriptor_t wDesc,
+                                      const void *w,
+                                      const cudnnConvolutionDescriptor_t convDesc,
+                                      cudnnConvolutionFwdAlgo_t algo,
+                                      void *workSpace,
+                                      size_t workSpaceSizeInBytes,
+                                      const void *alpha2,
+                                      const cudnnTensorDescriptor_t zDesc,
+                                      const void *z,
+                                      const cudnnTensorDescriptor_t biasDesc,
+                                      const void *bias,
+                                      const cudnnActivationDescriptor_t activationDesc,
+                                      const cudnnTensorDescriptor_t yDesc,
+                                      void *y);
+/* helper function to provide the convolution backward data algo that fit best the requirement */
+typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
+    cudnnConvolutionBwdDataAlgo_t algo;
+    cudnnStatus_t status;
+    float time;
+    size_t memory;
+    cudnnDeterminism_t determinism;
+    cudnnMathType_t mathType;
+    int reserved[3];
+} cudnnConvolutionBwdDataAlgoPerf_t;
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
+                                          const cudnnFilterDescriptor_t wDesc,
+                                          const cudnnTensorDescriptor_t dyDesc,
+                                          const cudnnConvolutionDescriptor_t convDesc,
+                                          const cudnnTensorDescriptor_t dxDesc,
+                                          const int requestedAlgoCount,
+                                          int *returnedAlgoCount,
+                                          cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
+                                            const cudnnFilterDescriptor_t wDesc,
+                                            const void *w,
+                                            const cudnnTensorDescriptor_t dyDesc,
+                                            const void *dy,
+                                            const cudnnConvolutionDescriptor_t convDesc,
+                                            const cudnnTensorDescriptor_t dxDesc,
+                                            void *dx,
+                                            const int requestedAlgoCount,
+                                            int *returnedAlgoCount,
+                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
+                                            void *workSpace,
+                                            size_t workSpaceSizeInBytes);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
+                                            const cudnnFilterDescriptor_t filterDesc,
+                                            const cudnnTensorDescriptor_t diffDesc,
+                                            const cudnnConvolutionDescriptor_t convDesc,
+                                            const cudnnTensorDescriptor_t gradDesc,
+                                            const int requestedAlgoCount,
+                                            int *returnedAlgoCount,
+                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
+/*
+ *  convolution algorithm (which requires potentially some workspace)
+ */
+/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
+                                             const cudnnFilterDescriptor_t wDesc,
+                                             const cudnnTensorDescriptor_t dyDesc,
+                                             const cudnnConvolutionDescriptor_t convDesc,
+                                             const cudnnTensorDescriptor_t dxDesc,
+                                             cudnnConvolutionBwdDataAlgo_t algo,
+                                             size_t *sizeInBytes);
+cudnnStatus_t CUDNNWINAPI
+cudnnConvolutionBackwardData(cudnnHandle_t handle,
+                             const void *alpha,
+                             const cudnnFilterDescriptor_t wDesc,
+                             const void *w,
+                             const cudnnTensorDescriptor_t dyDesc,
+                             const void *dy,
+                             const cudnnConvolutionDescriptor_t convDesc,
+                             cudnnConvolutionBwdDataAlgo_t algo,
+                             void *workSpace,
+                             size_t workSpaceSizeInBytes,
+                             const void *beta,
+                             const cudnnTensorDescriptor_t dxDesc,
+                             void *dx);
+/* Helper function to calculate folding descriptors for dgrad */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
+                                          const cudnnFilterDescriptor_t filterDesc,
+                                          const cudnnTensorDescriptor_t diffDesc,
+                                          const cudnnConvolutionDescriptor_t convDesc,
+                                          const cudnnTensorDescriptor_t gradDesc,
+                                          const cudnnTensorFormat_t transformFormat,
+                                          cudnnFilterDescriptor_t foldedFilterDesc,
+                                          cudnnTensorDescriptor_t paddedDiffDesc,
+                                          cudnnConvolutionDescriptor_t foldedConvDesc,
+                                          cudnnTensorDescriptor_t foldedGradDesc,
+                                          cudnnTensorTransformDescriptor_t filterFoldTransDesc,
+                                          cudnnTensorTransformDescriptor_t diffPadTransDesc,
+                                          cudnnTensorTransformDescriptor_t gradFoldTransDesc,
+                                          cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
+/* cudnnFusedOps... */
+struct cudnnFusedOpsConstParamStruct;
+typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t;
+struct cudnnFusedOpsVariantParamStruct;
+typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t;
+struct cudnnFusedOpsPlanStruct;
+typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t;
+typedef enum {
+    /* each op in [ ] can be disabled by passing NULL ptr */
+    /* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
+    CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
+    /* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
+    CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
+    /* utility for BN training in BN-conv fusion */
+    /* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
+    /* optionally update running stats and generate saved stats */
+    CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
+    /* utility for BN inference in BN-conv fusion */
+    /* computes the equivalent scale and bias from learned running stats and learned scale, bias */
+    CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
+    /* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
+    CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
+    /* reserved for future use: [per channel scale], [per channel bias], [residual add],  activation, bitmask */
+    CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
+    /* reserved for future use */
+    CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
+} cudnnFusedOps_t;
+typedef enum {
+    /* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get XDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_XDESC = 0,
+    /* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
+    /* set/get BN_MODE: pass cudnnBatchNormMode_t* */
+    CUDNN_PARAM_BN_MODE = 2,
+    /* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
+    /* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
+    /* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
+    /* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
+    /* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
+    CUDNN_PARAM_ACTIVATION_DESC = 6,
+    /* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
+    /* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
+    CUDNN_PARAM_CONV_DESC = 7,
+    /* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
+    /* get WDESC: pass previously created cudnnFilterDescriptor_t */
+    CUDNN_PARAM_WDESC = 8,
+    /* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
+    /* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
+    /* get DWDESC: pass previously created cudnnFilterDescriptor_t */
+    CUDNN_PARAM_DWDESC = 10,
+    /* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
+    /* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get YDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_YDESC = 12,
+    /* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
+    /* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get DYDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_DYDESC = 14,
+    /* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
+    /* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_YSTATS_DESC = 16,
+    /* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
+    /* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
+    /* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
+    /* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
+    /* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
+    /* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
+    /* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
+    /* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
+    /* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
+    /* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get ZDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_ZDESC = 26,
+    /* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
+    /* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
+    /* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
+    /* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
+    /* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
+    /* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
+    /* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get DXDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_DXDESC = 33,
+    /* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
+    /* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get DZDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_DZDESC = 35,
+    /* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
+    /* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
+    /* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
+} cudnnFusedOpsConstParamLabel_t;
+typedef enum {
+    CUDNN_PTR_NULL         = 0,
+    CUDNN_PTR_ELEM_ALIGNED = 1,
+    CUDNN_PTR_16B_ALIGNED  = 2,
+} cudnnFusedOpsPointerPlaceHolder_t;
+typedef enum {
+    /* set: pass void* pointing to dev memory */
+    /* get: pass void** pointing to host memory */
+    CUDNN_PTR_XDATA              = 0,
+    CUDNN_PTR_BN_EQSCALE         = 1,
+    CUDNN_PTR_BN_EQBIAS          = 2,
+    CUDNN_PTR_WDATA              = 3,
+    CUDNN_PTR_DWDATA             = 4,
+    CUDNN_PTR_YDATA              = 5,
+    CUDNN_PTR_DYDATA             = 6,
+    CUDNN_PTR_YSUM               = 7,
+    CUDNN_PTR_YSQSUM             = 8,
+    CUDNN_PTR_WORKSPACE          = 9,
+    CUDNN_PTR_BN_SCALE           = 10,
+    CUDNN_PTR_BN_BIAS            = 11,
+    CUDNN_PTR_BN_SAVED_MEAN      = 12,
+    CUDNN_PTR_BN_SAVED_INVSTD    = 13,
+    CUDNN_PTR_BN_RUNNING_MEAN    = 14,
+    CUDNN_PTR_BN_RUNNING_VAR     = 15,
+    CUDNN_PTR_ZDATA              = 16,
+    CUDNN_PTR_BN_Z_EQSCALE       = 17,
+    CUDNN_PTR_BN_Z_EQBIAS        = 18,
+    CUDNN_PTR_ACTIVATION_BITMASK = 19,
+    CUDNN_PTR_DXDATA             = 20,
+    CUDNN_PTR_DZDATA             = 21,
+    CUDNN_PTR_BN_DSCALE          = 22,
+    CUDNN_PTR_BN_DBIAS           = 23,
+    /* set/get: pass size_t* pointing to host memory */
+    CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
+    /* set/get: pass int64_t* pointing to host memory */
+    CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
+    /* set/get: pass double* pointing to host memory */
+    CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
+    /* set/get: pass double* pointing to host memory */
+    CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
+} cudnnFusedOpsVariantParamLabel_t;
+cudnnStatus_t CUDNNWINAPI
+cudnnCnnInferVersionCheck(void);
+#if defined(__cplusplus)
+}
+#endif
+#endif /* CUDNN_CNN_INFER_H_ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_version.h ADDED Viewed

	@@ -0,0 +1,70 @@

+/*
+ * Copyright 2017-2022 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+/**
+ * \file: The master cuDNN version file.
+ */
+#ifndef CUDNN_VERSION_H_
+#define CUDNN_VERSION_H_
+#define CUDNN_MAJOR 8
+#define CUDNN_MINOR 7
+#define CUDNN_PATCHLEVEL 0
+#define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)
+/* cannot use constexpr here since this is a C-only file */
+/* Below is the max SM version this cuDNN library is aware of and supports natively */
+#define CUDNN_MAX_SM_MAJOR_NUMBER 9
+#define CUDNN_MAX_SM_MINOR_NUMBER 0
+#define CUDNN_MAX_DEVICE_VERSION (CUDNN_MAX_SM_MAJOR_NUMBER * 100) + (CUDNN_MAX_SM_MINOR_NUMBER * 10)
+#endif /* CUDNN_VERSION_H */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (213 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cudalibxt.h ADDED Viewed

	@@ -0,0 +1,97 @@

+ /* Copyright 2013,2014 NVIDIA Corporation.  All rights reserved.
+  *
+  * NOTICE TO LICENSEE:
+  *
+  * The source code and/or documentation ("Licensed Deliverables") are
+  * subject to NVIDIA intellectual property rights under U.S. and
+  * international Copyright laws.
+  *
+  * The Licensed Deliverables contained herein are PROPRIETARY and
+  * CONFIDENTIAL to NVIDIA and are being provided under the terms and
+  * conditions of a form of NVIDIA software license agreement by and
+  * between NVIDIA and Licensee ("License Agreement") or electronically
+  * accepted by Licensee.  Notwithstanding any terms or conditions to
+  * the contrary in the License Agreement, reproduction or disclosure
+  * of the Licensed Deliverables to any third party without the express
+  * written consent of NVIDIA is prohibited.
+  *
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE
+  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+  * OF THESE LICENSED DELIVERABLES.
+  *
+  * U.S. Government End Users.  These Licensed Deliverables are a
+  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+  * 1995), consisting of "commercial computer software" and "commercial
+  * computer software documentation" as such terms are used in 48
+  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
+  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+  * U.S. Government End Users acquire the Licensed Deliverables with
+  * only those rights set forth herein.
+  *
+  * Any use of the Licensed Deliverables in individual and commercial
+  * software must include, in the user documentation and internal
+  * comments to the code, the above Disclaimer and U.S. Government End
+  * Users Notice.
+  */
+/*!
+* \file cudalibxt.h
+* \brief Public header file for the NVIDIA library multi-GPU support structures
+*/
+#ifndef _CUDA_LIB_XT_H_
+#define _CUDA_LIB_XT_H_
+#include <cuda_runtime.h>
+#define CUDA_XT_DESCRIPTOR_VERSION 0x01000000 // This is added to CUDART_VERSION
+enum cudaXtCopyType_t {
+    LIB_XT_COPY_HOST_TO_DEVICE,
+    LIB_XT_COPY_DEVICE_TO_HOST,
+    LIB_XT_COPY_DEVICE_TO_DEVICE
+} ;
+typedef enum cudaXtCopyType_t cudaLibXtCopyType;
+enum libFormat_t {
+    LIB_FORMAT_CUFFT        = 0x0,
+    LIB_FORMAT_UNDEFINED    = 0x1
+};
+typedef enum libFormat_t libFormat;
+#define MAX_CUDA_DESCRIPTOR_GPUS 64
+struct cudaXtDesc_t{
+    int version;                             //descriptor version
+    int nGPUs;                               //number of GPUs
+    int GPUs[MAX_CUDA_DESCRIPTOR_GPUS];      //array of device IDs
+    void *data[MAX_CUDA_DESCRIPTOR_GPUS];    //array of pointers to data, one per GPU
+    size_t size[MAX_CUDA_DESCRIPTOR_GPUS];   //array of data sizes, one per GPU
+    void *cudaXtState;                       //opaque CUDA utility structure
+};
+typedef struct cudaXtDesc_t cudaXtDesc;
+struct cudaLibXtDesc_t{
+    int version;                //descriptor version
+    cudaXtDesc *descriptor;     //multi-GPU memory descriptor
+    libFormat library;          //which library recognizes the format
+    int subFormat;              //library specific enumerator of sub formats
+    void *libDescriptor;        //library specific descriptor e.g. FFT transform plan object
+};
+typedef struct cudaLibXtDesc_t cudaLibXtDesc;
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftXt.h ADDED Viewed

	@@ -0,0 +1,269 @@

+ /* Copyright 2005-2021 NVIDIA Corporation.  All rights reserved.
+  *
+  * NOTICE TO LICENSEE:
+  *
+  * The source code and/or documentation ("Licensed Deliverables") are
+  * subject to NVIDIA intellectual property rights under U.S. and
+  * international Copyright laws.
+  *
+  * The Licensed Deliverables contained herein are PROPRIETARY and
+  * CONFIDENTIAL to NVIDIA and are being provided under the terms and
+  * conditions of a form of NVIDIA software license agreement by and
+  * between NVIDIA and Licensee ("License Agreement") or electronically
+  * accepted by Licensee.  Notwithstanding any terms or conditions to
+  * the contrary in the License Agreement, reproduction or disclosure
+  * of the Licensed Deliverables to any third party without the express
+  * written consent of NVIDIA is prohibited.
+  *
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE
+  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+  * OF THESE LICENSED DELIVERABLES.
+  *
+  * U.S. Government End Users.  These Licensed Deliverables are a
+  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+  * 1995), consisting of "commercial computer software" and "commercial
+  * computer software documentation" as such terms are used in 48
+  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
+  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+  * U.S. Government End Users acquire the Licensed Deliverables with
+  * only those rights set forth herein.
+  *
+  * Any use of the Licensed Deliverables in individual and commercial
+  * software must include, in the user documentation and internal
+  * comments to the code, the above Disclaimer and U.S. Government End
+  * Users Notice.
+  */
+/*!
+* \file cufftXt.h
+* \brief Public header file for the NVIDIA CUDA FFT library (CUFFT)
+*/
+#ifndef _CUFFTXT_H_
+#define _CUFFTXT_H_
+#include "cudalibxt.h"
+#include "cufft.h"
+#ifndef CUFFTAPI
+#ifdef _WIN32
+#define CUFFTAPI __stdcall
+#else
+#define CUFFTAPI
+#endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+//
+// cufftXtSubFormat identifies the data layout of
+// a memory descriptor owned by cufft.
+// note that multi GPU cufft does not yet support out-of-place transforms
+//
+typedef enum cufftXtSubFormat_t {
+    CUFFT_XT_FORMAT_INPUT = 0x00,              //by default input is in linear order across GPUs
+    CUFFT_XT_FORMAT_OUTPUT = 0x01,             //by default output is in scrambled order depending on transform
+    CUFFT_XT_FORMAT_INPLACE = 0x02,            //by default inplace is input order, which is linear across GPUs
+    CUFFT_XT_FORMAT_INPLACE_SHUFFLED = 0x03,   //shuffled output order after execution of the transform
+    CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED = 0x04,  //shuffled input order prior to execution of 1D transforms
+    CUFFT_XT_FORMAT_DISTRIBUTED_INPUT = 0x05,
+    CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT = 0x06,
+    CUFFT_FORMAT_UNDEFINED = 0x07
+} cufftXtSubFormat;
+//
+// cufftXtCopyType specifies the type of copy for cufftXtMemcpy
+//
+typedef enum cufftXtCopyType_t {
+    CUFFT_COPY_HOST_TO_DEVICE = 0x00,
+    CUFFT_COPY_DEVICE_TO_HOST = 0x01,
+    CUFFT_COPY_DEVICE_TO_DEVICE = 0x02,
+    CUFFT_COPY_UNDEFINED = 0x03
+} cufftXtCopyType;
+//
+// cufftXtQueryType specifies the type of query for cufftXtQueryPlan
+//
+typedef enum cufftXtQueryType_t {
+    CUFFT_QUERY_1D_FACTORS = 0x00,
+    CUFFT_QUERY_UNDEFINED = 0x01
+} cufftXtQueryType;
+typedef struct cufftXt1dFactors_t {
+    long long int size;
+    long long int stringCount;
+    long long int stringLength;
+    long long int substringLength;
+    long long int factor1;
+    long long int factor2;
+    long long int stringMask;
+    long long int substringMask;
+    long long int factor1Mask;
+    long long int factor2Mask;
+    int stringShift;
+    int substringShift;
+    int factor1Shift;
+    int factor2Shift;
+} cufftXt1dFactors;
+//
+// cufftXtWorkAreaPolicy specifies policy for cufftXtSetWorkAreaPolicy
+//
+typedef enum cufftXtWorkAreaPolicy_t {
+    CUFFT_WORKAREA_MINIMAL = 0, /* maximum reduction */
+    CUFFT_WORKAREA_USER = 1, /* use workSize parameter as limit */
+    CUFFT_WORKAREA_PERFORMANCE = 2, /* default - 1x overhead or more, maximum performance */
+} cufftXtWorkAreaPolicy;
+// multi-GPU routines
+cufftResult CUFFTAPI cufftXtSetGPUs(cufftHandle handle, int nGPUs, int *whichGPUs);
+cufftResult CUFFTAPI cufftXtMalloc(cufftHandle plan,
+                                   cudaLibXtDesc ** descriptor,
+                                   cufftXtSubFormat format);
+cufftResult CUFFTAPI cufftXtMemcpy(cufftHandle plan,
+                                   void *dstPointer,
+                                   void *srcPointer,
+                                   cufftXtCopyType type);
+cufftResult CUFFTAPI cufftXtFree(cudaLibXtDesc *descriptor);
+cufftResult CUFFTAPI cufftXtSetWorkArea(cufftHandle plan, void **workArea);
+cufftResult CUFFTAPI cufftXtExecDescriptorC2C(cufftHandle plan,
+                                              cudaLibXtDesc *input,
+                                              cudaLibXtDesc *output,
+                                              int direction);
+cufftResult CUFFTAPI cufftXtExecDescriptorR2C(cufftHandle plan,
+                                              cudaLibXtDesc *input,
+                                              cudaLibXtDesc *output);
+cufftResult CUFFTAPI cufftXtExecDescriptorC2R(cufftHandle plan,
+                                              cudaLibXtDesc *input,
+                                              cudaLibXtDesc *output);
+cufftResult CUFFTAPI cufftXtExecDescriptorZ2Z(cufftHandle plan,
+                                              cudaLibXtDesc *input,
+                                              cudaLibXtDesc *output,
+                                              int direction);
+cufftResult CUFFTAPI cufftXtExecDescriptorD2Z(cufftHandle plan,
+                                              cudaLibXtDesc *input,
+                                              cudaLibXtDesc *output);
+cufftResult CUFFTAPI cufftXtExecDescriptorZ2D(cufftHandle plan,
+                                              cudaLibXtDesc *input,
+                                              cudaLibXtDesc *output);
+// Utility functions
+cufftResult CUFFTAPI cufftXtQueryPlan(cufftHandle plan, void *queryStruct, cufftXtQueryType queryType);
+// callbacks
+typedef enum cufftXtCallbackType_t {
+    CUFFT_CB_LD_COMPLEX = 0x0,
+    CUFFT_CB_LD_COMPLEX_DOUBLE = 0x1,
+    CUFFT_CB_LD_REAL = 0x2,
+    CUFFT_CB_LD_REAL_DOUBLE = 0x3,
+    CUFFT_CB_ST_COMPLEX = 0x4,
+    CUFFT_CB_ST_COMPLEX_DOUBLE = 0x5,
+    CUFFT_CB_ST_REAL = 0x6,
+    CUFFT_CB_ST_REAL_DOUBLE = 0x7,
+    CUFFT_CB_UNDEFINED = 0x8
+} cufftXtCallbackType;
+typedef cufftComplex (*cufftCallbackLoadC)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
+typedef cufftDoubleComplex (*cufftCallbackLoadZ)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
+typedef cufftReal (*cufftCallbackLoadR)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
+typedef cufftDoubleReal(*cufftCallbackLoadD)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
+typedef void (*cufftCallbackStoreC)(void *dataOut, size_t offset, cufftComplex element, void *callerInfo, void *sharedPointer);
+typedef void (*cufftCallbackStoreZ)(void *dataOut, size_t offset, cufftDoubleComplex element, void *callerInfo, void *sharedPointer);
+typedef void (*cufftCallbackStoreR)(void *dataOut, size_t offset, cufftReal element, void *callerInfo, void *sharedPointer);
+typedef void (*cufftCallbackStoreD)(void *dataOut, size_t offset, cufftDoubleReal element, void *callerInfo, void *sharedPointer);
+cufftResult CUFFTAPI cufftXtSetCallback(cufftHandle plan, void **callback_routine, cufftXtCallbackType cbType, void **caller_info);
+cufftResult CUFFTAPI cufftXtClearCallback(cufftHandle plan, cufftXtCallbackType cbType);
+cufftResult CUFFTAPI cufftXtSetCallbackSharedSize(cufftHandle plan, cufftXtCallbackType cbType, size_t sharedSize);
+cufftResult CUFFTAPI cufftXtMakePlanMany(cufftHandle plan,
+                                         int rank,
+                                         long long int *n,
+                                         long long int *inembed,
+                                         long long int istride,
+                                         long long int idist,
+                                         cudaDataType inputtype,
+                                         long long int *onembed,
+                                         long long int ostride,
+                                         long long int odist,
+                                         cudaDataType outputtype,
+                                         long long int batch,
+                                         size_t *workSize,
+                                       	 cudaDataType executiontype);
+cufftResult CUFFTAPI cufftXtGetSizeMany(cufftHandle plan,
+                                        int rank,
+                                        long long int *n,
+                                        long long int *inembed,
+                                        long long int istride,
+                                        long long int idist,
+                                        cudaDataType inputtype,
+                                        long long int *onembed,
+                                        long long int ostride,
+                                        long long int odist,
+                                        cudaDataType outputtype,
+                                        long long int batch,
+                                        size_t *workSize,
+                                        cudaDataType executiontype);
+cufftResult CUFFTAPI cufftXtExec(cufftHandle plan,
+                                 void *input,
+                                 void *output,
+                                 int direction);
+cufftResult CUFFTAPI cufftXtExecDescriptor(cufftHandle plan,
+                                           cudaLibXtDesc *input,
+                                           cudaLibXtDesc *output,
+                                           int direction);
+cufftResult CUFFTAPI cufftXtSetWorkAreaPolicy(cufftHandle plan, cufftXtWorkAreaPolicy policy, size_t *workSize);
+typedef struct cufftBox3d_t {
+    size_t lower[3];
+    size_t upper[3];
+    size_t strides[3];
+} cufftBox3d;
+cufftResult CUFFTAPI cufftXtSetDistribution(cufftHandle plan,
+                                            const cufftBox3d *box_in,
+                                            const cufftBox3d *box_out);
+#ifdef __cplusplus
+}
+#endif
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/include/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_nvtx_cu11-11.8.86.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,35 @@

+Metadata-Version: 2.1
+Name: nvidia-nvtx-cu11
+Version: 11.8.86
+Summary: NVIDIA Tools Extension
+Home-page: https://developer.nvidia.com/cuda-zone
+Author: Nvidia CUDA Installer Team
+Author-email: cuda_installer@nvidia.com
+License: NVIDIA Proprietary Software
+Keywords: cuda,nvidia,runtime,machine learning,deep learning
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Education
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: Other/Proprietary License
+Classifier: Natural Language :: English
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Scientific/Engineering :: Mathematics
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Operating System :: POSIX :: Linux
+Requires-Python: >=3
+License-File: License.txt
+A C-based API for annotating events, code ranges, and resources in your applications. Applications which integrate NVTX can use the Visual Profiler to capture and visualize these events and ranges.

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_nvtx_cu11-11.8.86.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: bdist_wheel (0.37.1)
+Root-Is-Purelib: true
+Tag: py3-none-manylinux1_x86_64

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/__pycache__/_elffile.cpython-311.pyc ADDED Viewed

Binary file (5.53 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/_parser.py ADDED Viewed

	@@ -0,0 +1,354 @@

+"""Handwritten parser of dependency specifiers.
+The docstring for each __parse_* function contains EBNF-inspired grammar representing
+the implementation.
+"""
+from __future__ import annotations
+import ast
+from typing import NamedTuple, Sequence, Tuple, Union
+from ._tokenizer import DEFAULT_RULES, Tokenizer
+class Node:
+    def __init__(self, value: str) -> None:
+        self.value = value
+    def __str__(self) -> str:
+        return self.value
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__}('{self}')>"
+    def serialize(self) -> str:
+        raise NotImplementedError
+class Variable(Node):
+    def serialize(self) -> str:
+        return str(self)
+class Value(Node):
+    def serialize(self) -> str:
+        return f'"{self}"'
+class Op(Node):
+    def serialize(self) -> str:
+        return str(self)
+MarkerVar = Union[Variable, Value]
+MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
+MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
+MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
+class ParsedRequirement(NamedTuple):
+    name: str
+    url: str
+    extras: list[str]
+    specifier: str
+    marker: MarkerList | None
+# --------------------------------------------------------------------------------------
+# Recursive descent parser for dependency specifier
+# --------------------------------------------------------------------------------------
+def parse_requirement(source: str) -> ParsedRequirement:
+    return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
+def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
+    """
+    requirement = WS? IDENTIFIER WS? extras WS? requirement_details
+    """
+    tokenizer.consume("WS")
+    name_token = tokenizer.expect(
+        "IDENTIFIER", expected="package name at the start of dependency specifier"
+    )
+    name = name_token.text
+    tokenizer.consume("WS")
+    extras = _parse_extras(tokenizer)
+    tokenizer.consume("WS")
+    url, specifier, marker = _parse_requirement_details(tokenizer)
+    tokenizer.expect("END", expected="end of dependency specifier")
+    return ParsedRequirement(name, url, extras, specifier, marker)
+def _parse_requirement_details(
+    tokenizer: Tokenizer,
+) -> tuple[str, str, MarkerList | None]:
+    """
+    requirement_details = AT URL (WS requirement_marker?)?
+                        | specifier WS? (requirement_marker)?
+    """
+    specifier = ""
+    url = ""
+    marker = None
+    if tokenizer.check("AT"):
+        tokenizer.read()
+        tokenizer.consume("WS")
+        url_start = tokenizer.position
+        url = tokenizer.expect("URL", expected="URL after @").text
+        if tokenizer.check("END", peek=True):
+            return (url, specifier, marker)
+        tokenizer.expect("WS", expected="whitespace after URL")
+        # The input might end after whitespace.
+        if tokenizer.check("END", peek=True):
+            return (url, specifier, marker)
+        marker = _parse_requirement_marker(
+            tokenizer, span_start=url_start, after="URL and whitespace"
+        )
+    else:
+        specifier_start = tokenizer.position
+        specifier = _parse_specifier(tokenizer)
+        tokenizer.consume("WS")
+        if tokenizer.check("END", peek=True):
+            return (url, specifier, marker)
+        marker = _parse_requirement_marker(
+            tokenizer,
+            span_start=specifier_start,
+            after=(
+                "version specifier"
+                if specifier
+                else "name and no valid version specifier"
+            ),
+        )
+    return (url, specifier, marker)
+def _parse_requirement_marker(
+    tokenizer: Tokenizer, *, span_start: int, after: str
+) -> MarkerList:
+    """
+    requirement_marker = SEMICOLON marker WS?
+    """
+    if not tokenizer.check("SEMICOLON"):
+        tokenizer.raise_syntax_error(
+            f"Expected end or semicolon (after {after})",
+            span_start=span_start,
+        )
+    tokenizer.read()
+    marker = _parse_marker(tokenizer)
+    tokenizer.consume("WS")
+    return marker
+def _parse_extras(tokenizer: Tokenizer) -> list[str]:
+    """
+    extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
+    """
+    if not tokenizer.check("LEFT_BRACKET", peek=True):
+        return []
+    with tokenizer.enclosing_tokens(
+        "LEFT_BRACKET",
+        "RIGHT_BRACKET",
+        around="extras",
+    ):
+        tokenizer.consume("WS")
+        extras = _parse_extras_list(tokenizer)
+        tokenizer.consume("WS")
+    return extras
+def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
+    """
+    extras_list = identifier (wsp* ',' wsp* identifier)*
+    """
+    extras: list[str] = []
+    if not tokenizer.check("IDENTIFIER"):
+        return extras
+    extras.append(tokenizer.read().text)
+    while True:
+        tokenizer.consume("WS")
+        if tokenizer.check("IDENTIFIER", peek=True):
+            tokenizer.raise_syntax_error("Expected comma between extra names")
+        elif not tokenizer.check("COMMA"):
+            break
+        tokenizer.read()
+        tokenizer.consume("WS")
+        extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
+        extras.append(extra_token.text)
+    return extras
+def _parse_specifier(tokenizer: Tokenizer) -> str:
+    """
+    specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
+              | WS? version_many WS?
+    """
+    with tokenizer.enclosing_tokens(
+        "LEFT_PARENTHESIS",
+        "RIGHT_PARENTHESIS",
+        around="version specifier",
+    ):
+        tokenizer.consume("WS")
+        parsed_specifiers = _parse_version_many(tokenizer)
+        tokenizer.consume("WS")
+    return parsed_specifiers
+def _parse_version_many(tokenizer: Tokenizer) -> str:
+    """
+    version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
+    """
+    parsed_specifiers = ""
+    while tokenizer.check("SPECIFIER"):
+        span_start = tokenizer.position
+        parsed_specifiers += tokenizer.read().text
+        if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
+            tokenizer.raise_syntax_error(
+                ".* suffix can only be used with `==` or `!=` operators",
+                span_start=span_start,
+                span_end=tokenizer.position + 1,
+            )
+        if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
+            tokenizer.raise_syntax_error(
+                "Local version label can only be used with `==` or `!=` operators",
+                span_start=span_start,
+                span_end=tokenizer.position,
+            )
+        tokenizer.consume("WS")
+        if not tokenizer.check("COMMA"):
+            break
+        parsed_specifiers += tokenizer.read().text
+        tokenizer.consume("WS")
+    return parsed_specifiers
+# --------------------------------------------------------------------------------------
+# Recursive descent parser for marker expression
+# --------------------------------------------------------------------------------------
+def parse_marker(source: str) -> MarkerList:
+    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
+def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
+    retval = _parse_marker(tokenizer)
+    tokenizer.expect("END", expected="end of marker expression")
+    return retval
+def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
+    """
+    marker = marker_atom (BOOLOP marker_atom)+
+    """
+    expression = [_parse_marker_atom(tokenizer)]
+    while tokenizer.check("BOOLOP"):
+        token = tokenizer.read()
+        expr_right = _parse_marker_atom(tokenizer)
+        expression.extend((token.text, expr_right))
+    return expression
+def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
+    """
+    marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
+                | WS? marker_item WS?
+    """
+    tokenizer.consume("WS")
+    if tokenizer.check("LEFT_PARENTHESIS", peek=True):
+        with tokenizer.enclosing_tokens(
+            "LEFT_PARENTHESIS",
+            "RIGHT_PARENTHESIS",
+            around="marker expression",
+        ):
+            tokenizer.consume("WS")
+            marker: MarkerAtom = _parse_marker(tokenizer)
+            tokenizer.consume("WS")
+    else:
+        marker = _parse_marker_item(tokenizer)
+    tokenizer.consume("WS")
+    return marker
+def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
+    """
+    marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
+    """
+    tokenizer.consume("WS")
+    marker_var_left = _parse_marker_var(tokenizer)
+    tokenizer.consume("WS")
+    marker_op = _parse_marker_op(tokenizer)
+    tokenizer.consume("WS")
+    marker_var_right = _parse_marker_var(tokenizer)
+    tokenizer.consume("WS")
+    return (marker_var_left, marker_op, marker_var_right)
+def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
+    """
+    marker_var = VARIABLE | QUOTED_STRING
+    """
+    if tokenizer.check("VARIABLE"):
+        return process_env_var(tokenizer.read().text.replace(".", "_"))
+    elif tokenizer.check("QUOTED_STRING"):
+        return process_python_str(tokenizer.read().text)
+    else:
+        tokenizer.raise_syntax_error(
+            message="Expected a marker variable or quoted string"
+        )
+def process_env_var(env_var: str) -> Variable:
+    if env_var in ("platform_python_implementation", "python_implementation"):
+        return Variable("platform_python_implementation")
+    else:
+        return Variable(env_var)
+def process_python_str(python_str: str) -> Value:
+    value = ast.literal_eval(python_str)
+    return Value(str(value))
+def _parse_marker_op(tokenizer: Tokenizer) -> Op:
+    """
+    marker_op = IN | NOT IN | OP
+    """
+    if tokenizer.check("IN"):
+        tokenizer.read()
+        return Op("in")
+    elif tokenizer.check("NOT"):
+        tokenizer.read()
+        tokenizer.expect("WS", expected="whitespace after 'not'")
+        tokenizer.expect("IN", expected="'in' after 'not'")
+        return Op("not in")
+    elif tokenizer.check("OP"):
+        return Op(tokenizer.read().text)
+    else:
+        return tokenizer.raise_syntax_error(
+            "Expected marker operator, one of "
+            "<=, <, !=, ==, >=, >, ~=, ===, in, not in"
+        )

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/markers.py ADDED Viewed

	@@ -0,0 +1,331 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import annotations
+import operator
+import os
+import platform
+import sys
+from typing import Any, Callable, TypedDict, cast
+from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
+from ._parser import parse_marker as _parse_marker
+from ._tokenizer import ParserSyntaxError
+from .specifiers import InvalidSpecifier, Specifier
+from .utils import canonicalize_name
+__all__ = [
+    "InvalidMarker",
+    "Marker",
+    "UndefinedComparison",
+    "UndefinedEnvironmentName",
+    "default_environment",
+]
+Operator = Callable[[str, str], bool]
+class InvalidMarker(ValueError):
+    """
+    An invalid marker was found, users should refer to PEP 508.
+    """
+class UndefinedComparison(ValueError):
+    """
+    An invalid operation was attempted on a value that doesn't support it.
+    """
+class UndefinedEnvironmentName(ValueError):
+    """
+    A name was attempted to be used that does not exist inside of the
+    environment.
+    """
+class Environment(TypedDict):
+    implementation_name: str
+    """The implementation's identifier, e.g. ``'cpython'``."""
+    implementation_version: str
+    """
+    The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
+    ``'7.3.13'`` for PyPy3.10 v7.3.13.
+    """
+    os_name: str
+    """
+    The value of :py:data:`os.name`. The name of the operating system dependent module
+    imported, e.g. ``'posix'``.
+    """
+    platform_machine: str
+    """
+    Returns the machine type, e.g. ``'i386'``.
+    An empty string if the value cannot be determined.
+    """
+    platform_release: str
+    """
+    The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
+    An empty string if the value cannot be determined.
+    """
+    platform_system: str
+    """
+    The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
+    An empty string if the value cannot be determined.
+    """
+    platform_version: str
+    """
+    The system's release version, e.g. ``'#3 on degas'``.
+    An empty string if the value cannot be determined.
+    """
+    python_full_version: str
+    """
+    The Python version as string ``'major.minor.patchlevel'``.
+    Note that unlike the Python :py:data:`sys.version`, this value will always include
+    the patchlevel (it defaults to 0).
+    """
+    platform_python_implementation: str
+    """
+    A string identifying the Python implementation, e.g. ``'CPython'``.
+    """
+    python_version: str
+    """The Python version as string ``'major.minor'``."""
+    sys_platform: str
+    """
+    This string contains a platform identifier that can be used to append
+    platform-specific components to :py:data:`sys.path`, for instance.
+    For Unix systems, except on Linux and AIX, this is the lowercased OS name as
+    returned by ``uname -s`` with the first part of the version as returned by
+    ``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
+    was built.
+    """
+def _normalize_extra_values(results: Any) -> Any:
+    """
+    Normalize extra values.
+    """
+    if isinstance(results[0], tuple):
+        lhs, op, rhs = results[0]
+        if isinstance(lhs, Variable) and lhs.value == "extra":
+            normalized_extra = canonicalize_name(rhs.value)
+            rhs = Value(normalized_extra)
+        elif isinstance(rhs, Variable) and rhs.value == "extra":
+            normalized_extra = canonicalize_name(lhs.value)
+            lhs = Value(normalized_extra)
+        results[0] = lhs, op, rhs
+    return results
+def _format_marker(
+    marker: list[str] | MarkerAtom | str, first: bool | None = True
+) -> str:
+    assert isinstance(marker, (list, tuple, str))
+    # Sometimes we have a structure like [[...]] which is a single item list
+    # where the single item is itself it's own list. In that case we want skip
+    # the rest of this function so that we don't get extraneous () on the
+    # outside.
+    if (
+        isinstance(marker, list)
+        and len(marker) == 1
+        and isinstance(marker[0], (list, tuple))
+    ):
+        return _format_marker(marker[0])
+    if isinstance(marker, list):
+        inner = (_format_marker(m, first=False) for m in marker)
+        if first:
+            return " ".join(inner)
+        else:
+            return "(" + " ".join(inner) + ")"
+    elif isinstance(marker, tuple):
+        return " ".join([m.serialize() for m in marker])
+    else:
+        return marker
+_operators: dict[str, Operator] = {
+    "in": lambda lhs, rhs: lhs in rhs,
+    "not in": lambda lhs, rhs: lhs not in rhs,
+    "<": operator.lt,
+    "<=": operator.le,
+    "==": operator.eq,
+    "!=": operator.ne,
+    ">=": operator.ge,
+    ">": operator.gt,
+}
+def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
+    try:
+        spec = Specifier("".join([op.serialize(), rhs]))
+    except InvalidSpecifier:
+        pass
+    else:
+        return spec.contains(lhs, prereleases=True)
+    oper: Operator | None = _operators.get(op.serialize())
+    if oper is None:
+        raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
+    return oper(lhs, rhs)
+def _normalize(*values: str, key: str) -> tuple[str, ...]:
+    # PEP 685 – Comparison of extra names for optional distribution dependencies
+    # https://peps.python.org/pep-0685/
+    # > When comparing extra names, tools MUST normalize the names being
+    # > compared using the semantics outlined in PEP 503 for names
+    if key == "extra":
+        return tuple(canonicalize_name(v) for v in values)
+    # other environment markers don't have such standards
+    return values
+def _evaluate_markers(markers: MarkerList, environment: dict[str, str]) -> bool:
+    groups: list[list[bool]] = [[]]
+    for marker in markers:
+        assert isinstance(marker, (list, tuple, str))
+        if isinstance(marker, list):
+            groups[-1].append(_evaluate_markers(marker, environment))
+        elif isinstance(marker, tuple):
+            lhs, op, rhs = marker
+            if isinstance(lhs, Variable):
+                environment_key = lhs.value
+                lhs_value = environment[environment_key]
+                rhs_value = rhs.value
+            else:
+                lhs_value = lhs.value
+                environment_key = rhs.value
+                rhs_value = environment[environment_key]
+            lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
+            groups[-1].append(_eval_op(lhs_value, op, rhs_value))
+        else:
+            assert marker in ["and", "or"]
+            if marker == "or":
+                groups.append([])
+    return any(all(item) for item in groups)
+def format_full_version(info: sys._version_info) -> str:
+    version = f"{info.major}.{info.minor}.{info.micro}"
+    kind = info.releaselevel
+    if kind != "final":
+        version += kind[0] + str(info.serial)
+    return version
+def default_environment() -> Environment:
+    iver = format_full_version(sys.implementation.version)
+    implementation_name = sys.implementation.name
+    return {
+        "implementation_name": implementation_name,
+        "implementation_version": iver,
+        "os_name": os.name,
+        "platform_machine": platform.machine(),
+        "platform_release": platform.release(),
+        "platform_system": platform.system(),
+        "platform_version": platform.version(),
+        "python_full_version": platform.python_version(),
+        "platform_python_implementation": platform.python_implementation(),
+        "python_version": ".".join(platform.python_version_tuple()[:2]),
+        "sys_platform": sys.platform,
+    }
+class Marker:
+    def __init__(self, marker: str) -> None:
+        # Note: We create a Marker object without calling this constructor in
+        #       packaging.requirements.Requirement. If any additional logic is
+        #       added here, make sure to mirror/adapt Requirement.
+        try:
+            self._markers = _normalize_extra_values(_parse_marker(marker))
+            # The attribute `_markers` can be described in terms of a recursive type:
+            # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
+            #
+            # For example, the following expression:
+            # python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
+            #
+            # is parsed into:
+            # [
+            #     (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
+            #     'and',
+            #     [
+            #         (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
+            #         'or',
+            #         (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
+            #     ]
+            # ]
+        except ParserSyntaxError as e:
+            raise InvalidMarker(str(e)) from e
+    def __str__(self) -> str:
+        return _format_marker(self._markers)
+    def __repr__(self) -> str:
+        return f"<Marker('{self}')>"
+    def __hash__(self) -> int:
+        return hash((self.__class__.__name__, str(self)))
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, Marker):
+            return NotImplemented
+        return str(self) == str(other)
+    def evaluate(self, environment: dict[str, str] | None = None) -> bool:
+        """Evaluate a marker.
+        Return the boolean from evaluating the given marker against the
+        environment. environment is an optional argument to override all or
+        part of the determined environment.
+        The environment is determined from the current Python process.
+        """
+        current_environment = cast("dict[str, str]", default_environment())
+        current_environment["extra"] = ""
+        if environment is not None:
+            current_environment.update(environment)
+            # The API used to allow setting extra to None. We need to handle this
+            # case for backwards compatibility.
+            if current_environment["extra"] is None:
+                current_environment["extra"] = ""
+        return _evaluate_markers(
+            self._markers, _repair_python_full_version(current_environment)
+        )
+def _repair_python_full_version(env: dict[str, str]) -> dict[str, str]:
+    """
+    Work around platform.python_version() returning something that is not PEP 440
+    compliant for non-tagged Python builds.
+    """
+    if env["python_full_version"].endswith("+"):
+        env["python_full_version"] += "local"
+    return env

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/metadata.py ADDED Viewed

	@@ -0,0 +1,863 @@

+from __future__ import annotations
+import email.feedparser
+import email.header
+import email.message
+import email.parser
+import email.policy
+import pathlib
+import sys
+import typing
+from typing import (
+    Any,
+    Callable,
+    Generic,
+    Literal,
+    TypedDict,
+    cast,
+)
+from . import licenses, requirements, specifiers, utils
+from . import version as version_module
+from .licenses import NormalizedLicenseExpression
+T = typing.TypeVar("T")
+if sys.version_info >= (3, 11):  # pragma: no cover
+    ExceptionGroup = ExceptionGroup
+else:  # pragma: no cover
+    class ExceptionGroup(Exception):
+        """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
+        If :external:exc:`ExceptionGroup` is already defined by Python itself,
+        that version is used instead.
+        """
+        message: str
+        exceptions: list[Exception]
+        def __init__(self, message: str, exceptions: list[Exception]) -> None:
+            self.message = message
+            self.exceptions = exceptions
+        def __repr__(self) -> str:
+            return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
+class InvalidMetadata(ValueError):
+    """A metadata field contains invalid data."""
+    field: str
+    """The name of the field that contains invalid data."""
+    def __init__(self, field: str, message: str) -> None:
+        self.field = field
+        super().__init__(message)
+# The RawMetadata class attempts to make as few assumptions about the underlying
+# serialization formats as possible. The idea is that as long as a serialization
+# formats offer some very basic primitives in *some* way then we can support
+# serializing to and from that format.
+class RawMetadata(TypedDict, total=False):
+    """A dictionary of raw core metadata.
+    Each field in core metadata maps to a key of this dictionary (when data is
+    provided). The key is lower-case and underscores are used instead of dashes
+    compared to the equivalent core metadata field. Any core metadata field that
+    can be specified multiple times or can hold multiple values in a single
+    field have a key with a plural name. See :class:`Metadata` whose attributes
+    match the keys of this dictionary.
+    Core metadata fields that can be specified multiple times are stored as a
+    list or dict depending on which is appropriate for the field. Any fields
+    which hold multiple values in a single field are stored as a list.
+    """
+    # Metadata 1.0 - PEP 241
+    metadata_version: str
+    name: str
+    version: str
+    platforms: list[str]
+    summary: str
+    description: str
+    keywords: list[str]
+    home_page: str
+    author: str
+    author_email: str
+    license: str
+    # Metadata 1.1 - PEP 314
+    supported_platforms: list[str]
+    download_url: str
+    classifiers: list[str]
+    requires: list[str]
+    provides: list[str]
+    obsoletes: list[str]
+    # Metadata 1.2 - PEP 345
+    maintainer: str
+    maintainer_email: str
+    requires_dist: list[str]
+    provides_dist: list[str]
+    obsoletes_dist: list[str]
+    requires_python: str
+    requires_external: list[str]
+    project_urls: dict[str, str]
+    # Metadata 2.0
+    # PEP 426 attempted to completely revamp the metadata format
+    # but got stuck without ever being able to build consensus on
+    # it and ultimately ended up withdrawn.
+    #
+    # However, a number of tools had started emitting METADATA with
+    # `2.0` Metadata-Version, so for historical reasons, this version
+    # was skipped.
+    # Metadata 2.1 - PEP 566
+    description_content_type: str
+    provides_extra: list[str]
+    # Metadata 2.2 - PEP 643
+    dynamic: list[str]
+    # Metadata 2.3 - PEP 685
+    # No new fields were added in PEP 685, just some edge case were
+    # tightened up to provide better interoptability.
+    # Metadata 2.4 - PEP 639
+    license_expression: str
+    license_files: list[str]
+_STRING_FIELDS = {
+    "author",
+    "author_email",
+    "description",
+    "description_content_type",
+    "download_url",
+    "home_page",
+    "license",
+    "license_expression",
+    "maintainer",
+    "maintainer_email",
+    "metadata_version",
+    "name",
+    "requires_python",
+    "summary",
+    "version",
+}
+_LIST_FIELDS = {
+    "classifiers",
+    "dynamic",
+    "license_files",
+    "obsoletes",
+    "obsoletes_dist",
+    "platforms",
+    "provides",
+    "provides_dist",
+    "provides_extra",
+    "requires",
+    "requires_dist",
+    "requires_external",
+    "supported_platforms",
+}
+_DICT_FIELDS = {
+    "project_urls",
+}
+def _parse_keywords(data: str) -> list[str]:
+    """Split a string of comma-separated keywords into a list of keywords."""
+    return [k.strip() for k in data.split(",")]
+def _parse_project_urls(data: list[str]) -> dict[str, str]:
+    """Parse a list of label/URL string pairings separated by a comma."""
+    urls = {}
+    for pair in data:
+        # Our logic is slightly tricky here as we want to try and do
+        # *something* reasonable with malformed data.
+        #
+        # The main thing that we have to worry about, is data that does
+        # not have a ',' at all to split the label from the Value. There
+        # isn't a singular right answer here, and we will fail validation
+        # later on (if the caller is validating) so it doesn't *really*
+        # matter, but since the missing value has to be an empty str
+        # and our return value is dict[str, str], if we let the key
+        # be the missing value, then they'd have multiple '' values that
+        # overwrite each other in a accumulating dict.
+        #
+        # The other potentional issue is that it's possible to have the
+        # same label multiple times in the metadata, with no solid "right"
+        # answer with what to do in that case. As such, we'll do the only
+        # thing we can, which is treat the field as unparseable and add it
+        # to our list of unparsed fields.
+        parts = [p.strip() for p in pair.split(",", 1)]
+        parts.extend([""] * (max(0, 2 - len(parts))))  # Ensure 2 items
+        # TODO: The spec doesn't say anything about if the keys should be
+        #       considered case sensitive or not... logically they should
+        #       be case-preserving and case-insensitive, but doing that
+        #       would open up more cases where we might have duplicate
+        #       entries.
+        label, url = parts
+        if label in urls:
+            # The label already exists in our set of urls, so this field
+            # is unparseable, and we can just add the whole thing to our
+            # unparseable data and stop processing it.
+            raise KeyError("duplicate labels in project urls")
+        urls[label] = url
+    return urls
+def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
+    """Get the body of the message."""
+    # If our source is a str, then our caller has managed encodings for us,
+    # and we don't need to deal with it.
+    if isinstance(source, str):
+        payload = msg.get_payload()
+        assert isinstance(payload, str)
+        return payload
+    # If our source is a bytes, then we're managing the encoding and we need
+    # to deal with it.
+    else:
+        bpayload = msg.get_payload(decode=True)
+        assert isinstance(bpayload, bytes)
+        try:
+            return bpayload.decode("utf8", "strict")
+        except UnicodeDecodeError as exc:
+            raise ValueError("payload in an invalid encoding") from exc
+# The various parse_FORMAT functions here are intended to be as lenient as
+# possible in their parsing, while still returning a correctly typed
+# RawMetadata.
+#
+# To aid in this, we also generally want to do as little touching of the
+# data as possible, except where there are possibly some historic holdovers
+# that make valid data awkward to work with.
+#
+# While this is a lower level, intermediate format than our ``Metadata``
+# class, some light touch ups can make a massive difference in usability.
+# Map METADATA fields to RawMetadata.
+_EMAIL_TO_RAW_MAPPING = {
+    "author": "author",
+    "author-email": "author_email",
+    "classifier": "classifiers",
+    "description": "description",
+    "description-content-type": "description_content_type",
+    "download-url": "download_url",
+    "dynamic": "dynamic",
+    "home-page": "home_page",
+    "keywords": "keywords",
+    "license": "license",
+    "license-expression": "license_expression",
+    "license-file": "license_files",
+    "maintainer": "maintainer",
+    "maintainer-email": "maintainer_email",
+    "metadata-version": "metadata_version",
+    "name": "name",
+    "obsoletes": "obsoletes",
+    "obsoletes-dist": "obsoletes_dist",
+    "platform": "platforms",
+    "project-url": "project_urls",
+    "provides": "provides",
+    "provides-dist": "provides_dist",
+    "provides-extra": "provides_extra",
+    "requires": "requires",
+    "requires-dist": "requires_dist",
+    "requires-external": "requires_external",
+    "requires-python": "requires_python",
+    "summary": "summary",
+    "supported-platform": "supported_platforms",
+    "version": "version",
+}
+_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
+def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
+    """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
+    This function returns a two-item tuple of dicts. The first dict is of
+    recognized fields from the core metadata specification. Fields that can be
+    parsed and translated into Python's built-in types are converted
+    appropriately. All other fields are left as-is. Fields that are allowed to
+    appear multiple times are stored as lists.
+    The second dict contains all other fields from the metadata. This includes
+    any unrecognized fields. It also includes any fields which are expected to
+    be parsed into a built-in type but were not formatted appropriately. Finally,
+    any fields that are expected to appear only once but are repeated are
+    included in this dict.
+    """
+    raw: dict[str, str | list[str] | dict[str, str]] = {}
+    unparsed: dict[str, list[str]] = {}
+    if isinstance(data, str):
+        parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
+    else:
+        parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
+    # We have to wrap parsed.keys() in a set, because in the case of multiple
+    # values for a key (a list), the key will appear multiple times in the
+    # list of keys, but we're avoiding that by using get_all().
+    for name in frozenset(parsed.keys()):
+        # Header names in RFC are case insensitive, so we'll normalize to all
+        # lower case to make comparisons easier.
+        name = name.lower()
+        # We use get_all() here, even for fields that aren't multiple use,
+        # because otherwise someone could have e.g. two Name fields, and we
+        # would just silently ignore it rather than doing something about it.
+        headers = parsed.get_all(name) or []
+        # The way the email module works when parsing bytes is that it
+        # unconditionally decodes the bytes as ascii using the surrogateescape
+        # handler. When you pull that data back out (such as with get_all() ),
+        # it looks to see if the str has any surrogate escapes, and if it does
+        # it wraps it in a Header object instead of returning the string.
+        #
+        # As such, we'll look for those Header objects, and fix up the encoding.
+        value = []
+        # Flag if we have run into any issues processing the headers, thus
+        # signalling that the data belongs in 'unparsed'.
+        valid_encoding = True
+        for h in headers:
+            # It's unclear if this can return more types than just a Header or
+            # a str, so we'll just assert here to make sure.
+            assert isinstance(h, (email.header.Header, str))
+            # If it's a header object, we need to do our little dance to get
+            # the real data out of it. In cases where there is invalid data
+            # we're going to end up with mojibake, but there's no obvious, good
+            # way around that without reimplementing parts of the Header object
+            # ourselves.
+            #
+            # That should be fine since, if mojibacked happens, this key is
+            # going into the unparsed dict anyways.
+            if isinstance(h, email.header.Header):
+                # The Header object stores it's data as chunks, and each chunk
+                # can be independently encoded, so we'll need to check each
+                # of them.
+                chunks: list[tuple[bytes, str | None]] = []
+                for bin, encoding in email.header.decode_header(h):
+                    try:
+                        bin.decode("utf8", "strict")
+                    except UnicodeDecodeError:
+                        # Enable mojibake.
+                        encoding = "latin1"
+                        valid_encoding = False
+                    else:
+                        encoding = "utf8"
+                    chunks.append((bin, encoding))
+                # Turn our chunks back into a Header object, then let that
+                # Header object do the right thing to turn them into a
+                # string for us.
+                value.append(str(email.header.make_header(chunks)))
+            # This is already a string, so just add it.
+            else:
+                value.append(h)
+        # We've processed all of our values to get them into a list of str,
+        # but we may have mojibake data, in which case this is an unparsed
+        # field.
+        if not valid_encoding:
+            unparsed[name] = value
+            continue
+        raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
+        if raw_name is None:
+            # This is a bit of a weird situation, we've encountered a key that
+            # we don't know what it means, so we don't know whether it's meant
+            # to be a list or not.
+            #
+            # Since we can't really tell one way or another, we'll just leave it
+            # as a list, even though it may be a single item list, because that's
+            # what makes the most sense for email headers.
+            unparsed[name] = value
+            continue
+        # If this is one of our string fields, then we'll check to see if our
+        # value is a list of a single item. If it is then we'll assume that
+        # it was emitted as a single string, and unwrap the str from inside
+        # the list.
+        #
+        # If it's any other kind of data, then we haven't the faintest clue
+        # what we should parse it as, and we have to just add it to our list
+        # of unparsed stuff.
+        if raw_name in _STRING_FIELDS and len(value) == 1:
+            raw[raw_name] = value[0]
+        # If this is one of our list of string fields, then we can just assign
+        # the value, since email *only* has strings, and our get_all() call
+        # above ensures that this is a list.
+        elif raw_name in _LIST_FIELDS:
+            raw[raw_name] = value
+        # Special Case: Keywords
+        # The keywords field is implemented in the metadata spec as a str,
+        # but it conceptually is a list of strings, and is serialized using
+        # ", ".join(keywords), so we'll do some light data massaging to turn
+        # this into what it logically is.
+        elif raw_name == "keywords" and len(value) == 1:
+            raw[raw_name] = _parse_keywords(value[0])
+        # Special Case: Project-URL
+        # The project urls is implemented in the metadata spec as a list of
+        # specially-formatted strings that represent a key and a value, which
+        # is fundamentally a mapping, however the email format doesn't support
+        # mappings in a sane way, so it was crammed into a list of strings
+        # instead.
+        #
+        # We will do a little light data massaging to turn this into a map as
+        # it logically should be.
+        elif raw_name == "project_urls":
+            try:
+                raw[raw_name] = _parse_project_urls(value)
+            except KeyError:
+                unparsed[name] = value
+        # Nothing that we've done has managed to parse this, so it'll just
+        # throw it in our unparseable data and move on.
+        else:
+            unparsed[name] = value
+    # We need to support getting the Description from the message payload in
+    # addition to getting it from the the headers. This does mean, though, there
+    # is the possibility of it being set both ways, in which case we put both
+    # in 'unparsed' since we don't know which is right.
+    try:
+        payload = _get_payload(parsed, data)
+    except ValueError:
+        unparsed.setdefault("description", []).append(
+            parsed.get_payload(decode=isinstance(data, bytes))  # type: ignore[call-overload]
+        )
+    else:
+        if payload:
+            # Check to see if we've already got a description, if so then both
+            # it, and this body move to unparseable.
+            if "description" in raw:
+                description_header = cast(str, raw.pop("description"))
+                unparsed.setdefault("description", []).extend(
+                    [description_header, payload]
+                )
+            elif "description" in unparsed:
+                unparsed["description"].append(payload)
+            else:
+                raw["description"] = payload
+    # We need to cast our `raw` to a metadata, because a TypedDict only support
+    # literal key names, but we're computing our key names on purpose, but the
+    # way this function is implemented, our `TypedDict` can only have valid key
+    # names.
+    return cast(RawMetadata, raw), unparsed
+_NOT_FOUND = object()
+# Keep the two values in sync.
+_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
+_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
+_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
+class _Validator(Generic[T]):
+    """Validate a metadata field.
+    All _process_*() methods correspond to a core metadata field. The method is
+    called with the field's raw value. If the raw value is valid it is returned
+    in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
+    If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
+    as appropriate).
+    """
+    name: str
+    raw_name: str
+    added: _MetadataVersion
+    def __init__(
+        self,
+        *,
+        added: _MetadataVersion = "1.0",
+    ) -> None:
+        self.added = added
+    def __set_name__(self, _owner: Metadata, name: str) -> None:
+        self.name = name
+        self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
+    def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
+        # With Python 3.8, the caching can be replaced with functools.cached_property().
+        # No need to check the cache as attribute lookup will resolve into the
+        # instance's __dict__ before __get__ is called.
+        cache = instance.__dict__
+        value = instance._raw.get(self.name)
+        # To make the _process_* methods easier, we'll check if the value is None
+        # and if this field is NOT a required attribute, and if both of those
+        # things are true, we'll skip the the converter. This will mean that the
+        # converters never have to deal with the None union.
+        if self.name in _REQUIRED_ATTRS or value is not None:
+            try:
+                converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
+            except AttributeError:
+                pass
+            else:
+                value = converter(value)
+        cache[self.name] = value
+        try:
+            del instance._raw[self.name]  # type: ignore[misc]
+        except KeyError:
+            pass
+        return cast(T, value)
+    def _invalid_metadata(
+        self, msg: str, cause: Exception | None = None
+    ) -> InvalidMetadata:
+        exc = InvalidMetadata(
+            self.raw_name, msg.format_map({"field": repr(self.raw_name)})
+        )
+        exc.__cause__ = cause
+        return exc
+    def _process_metadata_version(self, value: str) -> _MetadataVersion:
+        # Implicitly makes Metadata-Version required.
+        if value not in _VALID_METADATA_VERSIONS:
+            raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
+        return cast(_MetadataVersion, value)
+    def _process_name(self, value: str) -> str:
+        if not value:
+            raise self._invalid_metadata("{field} is a required field")
+        # Validate the name as a side-effect.
+        try:
+            utils.canonicalize_name(value, validate=True)
+        except utils.InvalidName as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+        else:
+            return value
+    def _process_version(self, value: str) -> version_module.Version:
+        if not value:
+            raise self._invalid_metadata("{field} is a required field")
+        try:
+            return version_module.parse(value)
+        except version_module.InvalidVersion as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+    def _process_summary(self, value: str) -> str:
+        """Check the field contains no newlines."""
+        if "\n" in value:
+            raise self._invalid_metadata("{field} must be a single line")
+        return value
+    def _process_description_content_type(self, value: str) -> str:
+        content_types = {"text/plain", "text/x-rst", "text/markdown"}
+        message = email.message.EmailMessage()
+        message["content-type"] = value
+        content_type, parameters = (
+            # Defaults to `text/plain` if parsing failed.
+            message.get_content_type().lower(),
+            message["content-type"].params,
+        )
+        # Check if content-type is valid or defaulted to `text/plain` and thus was
+        # not parseable.
+        if content_type not in content_types or content_type not in value.lower():
+            raise self._invalid_metadata(
+                f"{{field}} must be one of {list(content_types)}, not {value!r}"
+            )
+        charset = parameters.get("charset", "UTF-8")
+        if charset != "UTF-8":
+            raise self._invalid_metadata(
+                f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
+            )
+        markdown_variants = {"GFM", "CommonMark"}
+        variant = parameters.get("variant", "GFM")  # Use an acceptable default.
+        if content_type == "text/markdown" and variant not in markdown_variants:
+            raise self._invalid_metadata(
+                f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
+                f"not {variant!r}",
+            )
+        return value
+    def _process_dynamic(self, value: list[str]) -> list[str]:
+        for dynamic_field in map(str.lower, value):
+            if dynamic_field in {"name", "version", "metadata-version"}:
+                raise self._invalid_metadata(
+                    f"{dynamic_field!r} is not allowed as a dynamic field"
+                )
+            elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
+                raise self._invalid_metadata(
+                    f"{dynamic_field!r} is not a valid dynamic field"
+                )
+        return list(map(str.lower, value))
+    def _process_provides_extra(
+        self,
+        value: list[str],
+    ) -> list[utils.NormalizedName]:
+        normalized_names = []
+        try:
+            for name in value:
+                normalized_names.append(utils.canonicalize_name(name, validate=True))
+        except utils.InvalidName as exc:
+            raise self._invalid_metadata(
+                f"{name!r} is invalid for {{field}}", cause=exc
+            ) from exc
+        else:
+            return normalized_names
+    def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
+        try:
+            return specifiers.SpecifierSet(value)
+        except specifiers.InvalidSpecifier as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+    def _process_requires_dist(
+        self,
+        value: list[str],
+    ) -> list[requirements.Requirement]:
+        reqs = []
+        try:
+            for req in value:
+                reqs.append(requirements.Requirement(req))
+        except requirements.InvalidRequirement as exc:
+            raise self._invalid_metadata(
+                f"{req!r} is invalid for {{field}}", cause=exc
+            ) from exc
+        else:
+            return reqs
+    def _process_license_expression(
+        self, value: str
+    ) -> NormalizedLicenseExpression | None:
+        try:
+            return licenses.canonicalize_license_expression(value)
+        except ValueError as exc:
+            raise self._invalid_metadata(
+                f"{value!r} is invalid for {{field}}", cause=exc
+            ) from exc
+    def _process_license_files(self, value: list[str]) -> list[str]:
+        paths = []
+        for path in value:
+            if ".." in path:
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, "
+                    "parent directory indicators are not allowed"
+                )
+            if "*" in path:
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, paths must be resolved"
+                )
+            if (
+                pathlib.PurePosixPath(path).is_absolute()
+                or pathlib.PureWindowsPath(path).is_absolute()
+            ):
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, paths must be relative"
+                )
+            if pathlib.PureWindowsPath(path).as_posix() != path:
+                raise self._invalid_metadata(
+                    f"{path!r} is invalid for {{field}}, "
+                    "paths must use '/' delimiter"
+                )
+            paths.append(path)
+        return paths
+class Metadata:
+    """Representation of distribution metadata.
+    Compared to :class:`RawMetadata`, this class provides objects representing
+    metadata fields instead of only using built-in types. Any invalid metadata
+    will cause :exc:`InvalidMetadata` to be raised (with a
+    :py:attr:`~BaseException.__cause__` attribute as appropriate).
+    """
+    _raw: RawMetadata
+    @classmethod
+    def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
+        """Create an instance from :class:`RawMetadata`.
+        If *validate* is true, all metadata will be validated. All exceptions
+        related to validation will be gathered and raised as an :class:`ExceptionGroup`.
+        """
+        ins = cls()
+        ins._raw = data.copy()  # Mutations occur due to caching enriched values.
+        if validate:
+            exceptions: list[Exception] = []
+            try:
+                metadata_version = ins.metadata_version
+                metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
+            except InvalidMetadata as metadata_version_exc:
+                exceptions.append(metadata_version_exc)
+                metadata_version = None
+            # Make sure to check for the fields that are present, the required
+            # fields (so their absence can be reported).
+            fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
+            # Remove fields that have already been checked.
+            fields_to_check -= {"metadata_version"}
+            for key in fields_to_check:
+                try:
+                    if metadata_version:
+                        # Can't use getattr() as that triggers descriptor protocol which
+                        # will fail due to no value for the instance argument.
+                        try:
+                            field_metadata_version = cls.__dict__[key].added
+                        except KeyError:
+                            exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
+                            exceptions.append(exc)
+                            continue
+                        field_age = _VALID_METADATA_VERSIONS.index(
+                            field_metadata_version
+                        )
+                        if field_age > metadata_age:
+                            field = _RAW_TO_EMAIL_MAPPING[key]
+                            exc = InvalidMetadata(
+                                field,
+                                f"{field} introduced in metadata version "
+                                f"{field_metadata_version}, not {metadata_version}",
+                            )
+                            exceptions.append(exc)
+                            continue
+                    getattr(ins, key)
+                except InvalidMetadata as exc:
+                    exceptions.append(exc)
+            if exceptions:
+                raise ExceptionGroup("invalid metadata", exceptions)
+        return ins
+    @classmethod
+    def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
+        """Parse metadata from email headers.
+        If *validate* is true, the metadata will be validated. All exceptions
+        related to validation will be gathered and raised as an :class:`ExceptionGroup`.
+        """
+        raw, unparsed = parse_email(data)
+        if validate:
+            exceptions: list[Exception] = []
+            for unparsed_key in unparsed:
+                if unparsed_key in _EMAIL_TO_RAW_MAPPING:
+                    message = f"{unparsed_key!r} has invalid data"
+                else:
+                    message = f"unrecognized field: {unparsed_key!r}"
+                exceptions.append(InvalidMetadata(unparsed_key, message))
+            if exceptions:
+                raise ExceptionGroup("unparsed", exceptions)
+        try:
+            return cls.from_raw(raw, validate=validate)
+        except ExceptionGroup as exc_group:
+            raise ExceptionGroup(
+                "invalid or unparsed metadata", exc_group.exceptions
+            ) from None
+    metadata_version: _Validator[_MetadataVersion] = _Validator()
+    """:external:ref:`core-metadata-metadata-version`
+    (required; validated to be a valid metadata version)"""
+    # `name` is not normalized/typed to NormalizedName so as to provide access to
+    # the original/raw name.
+    name: _Validator[str] = _Validator()
+    """:external:ref:`core-metadata-name`
+    (required; validated using :func:`~packaging.utils.canonicalize_name` and its
+    *validate* parameter)"""
+    version: _Validator[version_module.Version] = _Validator()
+    """:external:ref:`core-metadata-version` (required)"""
+    dynamic: _Validator[list[str] | None] = _Validator(
+        added="2.2",
+    )
+    """:external:ref:`core-metadata-dynamic`
+    (validated against core metadata field names and lowercased)"""
+    platforms: _Validator[list[str] | None] = _Validator()
+    """:external:ref:`core-metadata-platform`"""
+    supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
+    """:external:ref:`core-metadata-supported-platform`"""
+    summary: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
+    description: _Validator[str | None] = _Validator()  # TODO 2.1: can be in body
+    """:external:ref:`core-metadata-description`"""
+    description_content_type: _Validator[str | None] = _Validator(added="2.1")
+    """:external:ref:`core-metadata-description-content-type` (validated)"""
+    keywords: _Validator[list[str] | None] = _Validator()
+    """:external:ref:`core-metadata-keywords`"""
+    home_page: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-home-page`"""
+    download_url: _Validator[str | None] = _Validator(added="1.1")
+    """:external:ref:`core-metadata-download-url`"""
+    author: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-author`"""
+    author_email: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-author-email`"""
+    maintainer: _Validator[str | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-maintainer`"""
+    maintainer_email: _Validator[str | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-maintainer-email`"""
+    license: _Validator[str | None] = _Validator()
+    """:external:ref:`core-metadata-license`"""
+    license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
+        added="2.4"
+    )
+    """:external:ref:`core-metadata-license-expression`"""
+    license_files: _Validator[list[str] | None] = _Validator(added="2.4")
+    """:external:ref:`core-metadata-license-file`"""
+    classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
+    """:external:ref:`core-metadata-classifier`"""
+    requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
+        added="1.2"
+    )
+    """:external:ref:`core-metadata-requires-dist`"""
+    requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
+        added="1.2"
+    )
+    """:external:ref:`core-metadata-requires-python`"""
+    # Because `Requires-External` allows for non-PEP 440 version specifiers, we
+    # don't do any processing on the values.
+    requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-requires-external`"""
+    project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-project-url`"""
+    # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
+    # regardless of metadata version.
+    provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
+        added="2.1",
+    )
+    """:external:ref:`core-metadata-provides-extra`"""
+    provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-provides-dist`"""
+    obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
+    """:external:ref:`core-metadata-obsoletes-dist`"""
+    requires: _Validator[list[str] | None] = _Validator(added="1.1")
+    """``Requires`` (deprecated)"""
+    provides: _Validator[list[str] | None] = _Validator(added="1.1")
+    """``Provides`` (deprecated)"""
+    obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
+    """``Obsoletes`` (deprecated)"""

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/py.typed ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/specifiers.py ADDED Viewed

	@@ -0,0 +1,1020 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+"""
+.. testsetup::
+    from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier
+    from packaging.version import Version
+"""
+from __future__ import annotations
+import abc
+import itertools
+import re
+from typing import Callable, Iterable, Iterator, TypeVar, Union
+from .utils import canonicalize_version
+from .version import Version
+UnparsedVersion = Union[Version, str]
+UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion)
+CallableOperator = Callable[[Version, str], bool]
+def _coerce_version(version: UnparsedVersion) -> Version:
+    if not isinstance(version, Version):
+        version = Version(version)
+    return version
+class InvalidSpecifier(ValueError):
+    """
+    Raised when attempting to create a :class:`Specifier` with a specifier
+    string that is invalid.
+    >>> Specifier("lolwat")
+    Traceback (most recent call last):
+        ...
+    packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat'
+    """
+class BaseSpecifier(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def __str__(self) -> str:
+        """
+        Returns the str representation of this Specifier-like object. This
+        should be representative of the Specifier itself.
+        """
+    @abc.abstractmethod
+    def __hash__(self) -> int:
+        """
+        Returns a hash value for this Specifier-like object.
+        """
+    @abc.abstractmethod
+    def __eq__(self, other: object) -> bool:
+        """
+        Returns a boolean representing whether or not the two Specifier-like
+        objects are equal.
+        :param other: The other object to check against.
+        """
+    @property
+    @abc.abstractmethod
+    def prereleases(self) -> bool | None:
+        """Whether or not pre-releases as a whole are allowed.
+        This can be set to either ``True`` or ``False`` to explicitly enable or disable
+        prereleases or it can be set to ``None`` (the default) to use default semantics.
+        """
+    @prereleases.setter
+    def prereleases(self, value: bool) -> None:
+        """Setter for :attr:`prereleases`.
+        :param value: The value to set.
+        """
+    @abc.abstractmethod
+    def contains(self, item: str, prereleases: bool | None = None) -> bool:
+        """
+        Determines if the given item is contained within this specifier.
+        """
+    @abc.abstractmethod
+    def filter(
+        self, iterable: Iterable[UnparsedVersionVar], prereleases: bool | None = None
+    ) -> Iterator[UnparsedVersionVar]:
+        """
+        Takes an iterable of items and filters them so that only items which
+        are contained within this specifier are allowed in it.
+        """
+class Specifier(BaseSpecifier):
+    """This class abstracts handling of version specifiers.
+    .. tip::
+        It is generally not required to instantiate this manually. You should instead
+        prefer to work with :class:`SpecifierSet` instead, which can parse
+        comma-separated version specifiers (which is what package metadata contains).
+    """
+    _operator_regex_str = r"""
+        (?P<operator>(~=|==|!=|<=|>=|<|>|===))
+        """
+    _version_regex_str = r"""
+        (?P<version>
+            (?:
+                # The identity operators allow for an escape hatch that will
+                # do an exact string match of the version you wish to install.
+                # This will not be parsed by PEP 440 and we cannot determine
+                # any semantic meaning from it. This operator is discouraged
+                # but included entirely as an escape hatch.
+                (?<====)  # Only match for the identity operator
+                \s*
+                [^\s;)]*  # The arbitrary version can be just about anything,
+                          # we match everything except for whitespace, a
+                          # semi-colon for marker support, and a closing paren
+                          # since versions can be enclosed in them.
+            )
+            |
+            (?:
+                # The (non)equality operators allow for wild card and local
+                # versions to be specified so we have to define these two
+                # operators separately to enable that.
+                (?<===|!=)            # Only match for equals and not equals
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)*   # release
+                # You cannot use a wild card and a pre-release, post-release, a dev or
+                # local version together so group them with a | and make them optional.
+                (?:
+                    \.\*  # Wild card syntax of .*
+                    |
+                    (?:                                  # pre release
+                        [-_\.]?
+                        (alpha|beta|preview|pre|a|b|c|rc)
+                        [-_\.]?
+                        [0-9]*
+                    )?
+                    (?:                                  # post release
+                        (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                    )?
+                    (?:[-_\.]?dev[-_\.]?[0-9]*)?         # dev release
+                    (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local
+                )?
+            )
+            |
+            (?:
+                # The compatible operator requires at least two digits in the
+                # release segment.
+                (?<=~=)               # Only match for the compatible operator
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)+   # release  (We have a + instead of a *)
+                (?:                   # pre release
+                    [-_\.]?
+                    (alpha|beta|preview|pre|a|b|c|rc)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+                (?:[-_\.]?dev[-_\.]?[0-9]*)?          # dev release
+            )
+            |
+            (?:
+                # All other operators only allow a sub set of what the
+                # (non)equality operators do. Specifically they do not allow
+                # local versions to be specified nor do they allow the prefix
+                # matching wild cards.
+                (?<!==|!=|~=)         # We have special cases for these
+                                      # operators so we want to make sure they
+                                      # don't match here.
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)*   # release
+                (?:                   # pre release
+                    [-_\.]?
+                    (alpha|beta|preview|pre|a|b|c|rc)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+                (?:[-_\.]?dev[-_\.]?[0-9]*)?          # dev release
+            )
+        )
+        """
+    _regex = re.compile(
+        r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$",
+        re.VERBOSE | re.IGNORECASE,
+    )
+    _operators = {
+        "~=": "compatible",
+        "==": "equal",
+        "!=": "not_equal",
+        "<=": "less_than_equal",
+        ">=": "greater_than_equal",
+        "<": "less_than",
+        ">": "greater_than",
+        "===": "arbitrary",
+    }
+    def __init__(self, spec: str = "", prereleases: bool | None = None) -> None:
+        """Initialize a Specifier instance.
+        :param spec:
+            The string representation of a specifier which will be parsed and
+            normalized before use.
+        :param prereleases:
+            This tells the specifier if it should accept prerelease versions if
+            applicable or not. The default of ``None`` will autodetect it from the
+            given specifiers.
+        :raises InvalidSpecifier:
+            If the given specifier is invalid (i.e. bad syntax).
+        """
+        match = self._regex.search(spec)
+        if not match:
+            raise InvalidSpecifier(f"Invalid specifier: {spec!r}")
+        self._spec: tuple[str, str] = (
+            match.group("operator").strip(),
+            match.group("version").strip(),
+        )
+        # Store whether or not this Specifier should accept prereleases
+        self._prereleases = prereleases
+    # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515
+    @property  # type: ignore[override]
+    def prereleases(self) -> bool:
+        # If there is an explicit prereleases set for this, then we'll just
+        # blindly use that.
+        if self._prereleases is not None:
+            return self._prereleases
+        # Look at all of our specifiers and determine if they are inclusive
+        # operators, and if they are if they are including an explicit
+        # prerelease.
+        operator, version = self._spec
+        if operator in ["==", ">=", "<=", "~=", "===", ">", "<"]:
+            # The == specifier can include a trailing .*, if it does we
+            # want to remove before parsing.
+            if operator == "==" and version.endswith(".*"):
+                version = version[:-2]
+            # Parse the version, and if it is a pre-release than this
+            # specifier allows pre-releases.
+            if Version(version).is_prerelease:
+                return True
+        return False
+    @prereleases.setter
+    def prereleases(self, value: bool) -> None:
+        self._prereleases = value
+    @property
+    def operator(self) -> str:
+        """The operator of this specifier.
+        >>> Specifier("==1.2.3").operator
+        '=='
+        """
+        return self._spec[0]
+    @property
+    def version(self) -> str:
+        """The version of this specifier.
+        >>> Specifier("==1.2.3").version
+        '1.2.3'
+        """
+        return self._spec[1]
+    def __repr__(self) -> str:
+        """A representation of the Specifier that shows all internal state.
+        >>> Specifier('>=1.0.0')
+        <Specifier('>=1.0.0')>
+        >>> Specifier('>=1.0.0', prereleases=False)
+        <Specifier('>=1.0.0', prereleases=False)>
+        >>> Specifier('>=1.0.0', prereleases=True)
+        <Specifier('>=1.0.0', prereleases=True)>
+        """
+        pre = (
+            f", prereleases={self.prereleases!r}"
+            if self._prereleases is not None
+            else ""
+        )
+        return f"<{self.__class__.__name__}({str(self)!r}{pre})>"
+    def __str__(self) -> str:
+        """A string representation of the Specifier that can be round-tripped.
+        >>> str(Specifier('>=1.0.0'))
+        '>=1.0.0'
+        >>> str(Specifier('>=1.0.0', prereleases=False))
+        '>=1.0.0'
+        """
+        return "{}{}".format(*self._spec)
+    @property
+    def _canonical_spec(self) -> tuple[str, str]:
+        canonical_version = canonicalize_version(
+            self._spec[1],
+            strip_trailing_zero=(self._spec[0] != "~="),
+        )
+        return self._spec[0], canonical_version
+    def __hash__(self) -> int:
+        return hash(self._canonical_spec)
+    def __eq__(self, other: object) -> bool:
+        """Whether or not the two Specifier-like objects are equal.
+        :param other: The other object to check against.
+        The value of :attr:`prereleases` is ignored.
+        >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0")
+        True
+        >>> (Specifier("==1.2.3", prereleases=False) ==
+        ...  Specifier("==1.2.3", prereleases=True))
+        True
+        >>> Specifier("==1.2.3") == "==1.2.3"
+        True
+        >>> Specifier("==1.2.3") == Specifier("==1.2.4")
+        False
+        >>> Specifier("==1.2.3") == Specifier("~=1.2.3")
+        False
+        """
+        if isinstance(other, str):
+            try:
+                other = self.__class__(str(other))
+            except InvalidSpecifier:
+                return NotImplemented
+        elif not isinstance(other, self.__class__):
+            return NotImplemented
+        return self._canonical_spec == other._canonical_spec
+    def _get_operator(self, op: str) -> CallableOperator:
+        operator_callable: CallableOperator = getattr(
+            self, f"_compare_{self._operators[op]}"
+        )
+        return operator_callable
+    def _compare_compatible(self, prospective: Version, spec: str) -> bool:
+        # Compatible releases have an equivalent combination of >= and ==. That
+        # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to
+        # implement this in terms of the other specifiers instead of
+        # implementing it ourselves. The only thing we need to do is construct
+        # the other specifiers.
+        # We want everything but the last item in the version, but we want to
+        # ignore suffix segments.
+        prefix = _version_join(
+            list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1]
+        )
+        # Add the prefix notation to the end of our string
+        prefix += ".*"
+        return self._get_operator(">=")(prospective, spec) and self._get_operator("==")(
+            prospective, prefix
+        )
+    def _compare_equal(self, prospective: Version, spec: str) -> bool:
+        # We need special logic to handle prefix matching
+        if spec.endswith(".*"):
+            # In the case of prefix matching we want to ignore local segment.
+            normalized_prospective = canonicalize_version(
+                prospective.public, strip_trailing_zero=False
+            )
+            # Get the normalized version string ignoring the trailing .*
+            normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False)
+            # Split the spec out by bangs and dots, and pretend that there is
+            # an implicit dot in between a release segment and a pre-release segment.
+            split_spec = _version_split(normalized_spec)
+            # Split the prospective version out by bangs and dots, and pretend
+            # that there is an implicit dot in between a release segment and
+            # a pre-release segment.
+            split_prospective = _version_split(normalized_prospective)
+            # 0-pad the prospective version before shortening it to get the correct
+            # shortened version.
+            padded_prospective, _ = _pad_version(split_prospective, split_spec)
+            # Shorten the prospective version to be the same length as the spec
+            # so that we can determine if the specifier is a prefix of the
+            # prospective version or not.
+            shortened_prospective = padded_prospective[: len(split_spec)]
+            return shortened_prospective == split_spec
+        else:
+            # Convert our spec string into a Version
+            spec_version = Version(spec)
+            # If the specifier does not have a local segment, then we want to
+            # act as if the prospective version also does not have a local
+            # segment.
+            if not spec_version.local:
+                prospective = Version(prospective.public)
+            return prospective == spec_version
+    def _compare_not_equal(self, prospective: Version, spec: str) -> bool:
+        return not self._compare_equal(prospective, spec)
+    def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool:
+        # NB: Local version identifiers are NOT permitted in the version
+        # specifier, so local version labels can be universally removed from
+        # the prospective version.
+        return Version(prospective.public) <= Version(spec)
+    def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool:
+        # NB: Local version identifiers are NOT permitted in the version
+        # specifier, so local version labels can be universally removed from
+        # the prospective version.
+        return Version(prospective.public) >= Version(spec)
+    def _compare_less_than(self, prospective: Version, spec_str: str) -> bool:
+        # Convert our spec to a Version instance, since we'll want to work with
+        # it as a version.
+        spec = Version(spec_str)
+        # Check to see if the prospective version is less than the spec
+        # version. If it's not we can short circuit and just return False now
+        # instead of doing extra unneeded work.
+        if not prospective < spec:
+            return False
+        # This special case is here so that, unless the specifier itself
+        # includes is a pre-release version, that we do not accept pre-release
+        # versions for the version mentioned in the specifier (e.g. <3.1 should
+        # not match 3.1.dev0, but should match 3.0.dev0).
+        if not spec.is_prerelease and prospective.is_prerelease:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+        # If we've gotten to here, it means that prospective version is both
+        # less than the spec version *and* it's not a pre-release of the same
+        # version in the spec.
+        return True
+    def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool:
+        # Convert our spec to a Version instance, since we'll want to work with
+        # it as a version.
+        spec = Version(spec_str)
+        # Check to see if the prospective version is greater than the spec
+        # version. If it's not we can short circuit and just return False now
+        # instead of doing extra unneeded work.
+        if not prospective > spec:
+            return False
+        # This special case is here so that, unless the specifier itself
+        # includes is a post-release version, that we do not accept
+        # post-release versions for the version mentioned in the specifier
+        # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0).
+        if not spec.is_postrelease and prospective.is_postrelease:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+        # Ensure that we do not allow a local version of the version mentioned
+        # in the specifier, which is technically greater than, to match.
+        if prospective.local is not None:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+        # If we've gotten to here, it means that prospective version is both
+        # greater than the spec version *and* it's not a pre-release of the
+        # same version in the spec.
+        return True
+    def _compare_arbitrary(self, prospective: Version, spec: str) -> bool:
+        return str(prospective).lower() == str(spec).lower()
+    def __contains__(self, item: str | Version) -> bool:
+        """Return whether or not the item is contained in this specifier.
+        :param item: The item to check for.
+        This is used for the ``in`` operator and behaves the same as
+        :meth:`contains` with no ``prereleases`` argument passed.
+        >>> "1.2.3" in Specifier(">=1.2.3")
+        True
+        >>> Version("1.2.3") in Specifier(">=1.2.3")
+        True
+        >>> "1.0.0" in Specifier(">=1.2.3")
+        False
+        >>> "1.3.0a1" in Specifier(">=1.2.3")
+        False
+        >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True)
+        True
+        """
+        return self.contains(item)
+    def contains(self, item: UnparsedVersion, prereleases: bool | None = None) -> bool:
+        """Return whether or not the item is contained in this specifier.
+        :param item:
+            The item to check for, which can be a version string or a
+            :class:`Version` instance.
+        :param prereleases:
+            Whether or not to match prereleases with this Specifier. If set to
+            ``None`` (the default), it uses :attr:`prereleases` to determine
+            whether or not prereleases are allowed.
+        >>> Specifier(">=1.2.3").contains("1.2.3")
+        True
+        >>> Specifier(">=1.2.3").contains(Version("1.2.3"))
+        True
+        >>> Specifier(">=1.2.3").contains("1.0.0")
+        False
+        >>> Specifier(">=1.2.3").contains("1.3.0a1")
+        False
+        >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1")
+        True
+        >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True)
+        True
+        """
+        # Determine if prereleases are to be allowed or not.
+        if prereleases is None:
+            prereleases = self.prereleases
+        # Normalize item to a Version, this allows us to have a shortcut for
+        # "2.0" in Specifier(">=2")
+        normalized_item = _coerce_version(item)
+        # Determine if we should be supporting prereleases in this specifier
+        # or not, if we do not support prereleases than we can short circuit
+        # logic if this version is a prereleases.
+        if normalized_item.is_prerelease and not prereleases:
+            return False
+        # Actually do the comparison to determine if this item is contained
+        # within this Specifier or not.
+        operator_callable: CallableOperator = self._get_operator(self.operator)
+        return operator_callable(normalized_item, self.version)
+    def filter(
+        self, iterable: Iterable[UnparsedVersionVar], prereleases: bool | None = None
+    ) -> Iterator[UnparsedVersionVar]:
+        """Filter items in the given iterable, that match the specifier.
+        :param iterable:
+            An iterable that can contain version strings and :class:`Version` instances.
+            The items in the iterable will be filtered according to the specifier.
+        :param prereleases:
+            Whether or not to allow prereleases in the returned iterator. If set to
+            ``None`` (the default), it will be intelligently decide whether to allow
+            prereleases or not (based on the :attr:`prereleases` attribute, and
+            whether the only versions matching are prereleases).
+        This method is smarter than just ``filter(Specifier().contains, [...])``
+        because it implements the rule from :pep:`440` that a prerelease item
+        SHOULD be accepted if no other versions match the given specifier.
+        >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"]))
+        ['1.3']
+        >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")]))
+        ['1.2.3', '1.3', <Version('1.4')>]
+        >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"]))
+        ['1.5a1']
+        >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True))
+        ['1.3', '1.5a1']
+        >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"]))
+        ['1.3', '1.5a1']
+        """
+        yielded = False
+        found_prereleases = []
+        kw = {"prereleases": prereleases if prereleases is not None else True}
+        # Attempt to iterate over all the values in the iterable and if any of
+        # them match, yield them.
+        for version in iterable:
+            parsed_version = _coerce_version(version)
+            if self.contains(parsed_version, **kw):
+                # If our version is a prerelease, and we were not set to allow
+                # prereleases, then we'll store it for later in case nothing
+                # else matches this specifier.
+                if parsed_version.is_prerelease and not (
+                    prereleases or self.prereleases
+                ):
+                    found_prereleases.append(version)
+                # Either this is not a prerelease, or we should have been
+                # accepting prereleases from the beginning.
+                else:
+                    yielded = True
+                    yield version
+        # Now that we've iterated over everything, determine if we've yielded
+        # any values, and if we have not and we have any prereleases stored up
+        # then we will go ahead and yield the prereleases.
+        if not yielded and found_prereleases:
+            for version in found_prereleases:
+                yield version
+_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$")
+def _version_split(version: str) -> list[str]:
+    """Split version into components.
+    The split components are intended for version comparison. The logic does
+    not attempt to retain the original version string, so joining the
+    components back with :func:`_version_join` may not produce the original
+    version string.
+    """
+    result: list[str] = []
+    epoch, _, rest = version.rpartition("!")
+    result.append(epoch or "0")
+    for item in rest.split("."):
+        match = _prefix_regex.search(item)
+        if match:
+            result.extend(match.groups())
+        else:
+            result.append(item)
+    return result
+def _version_join(components: list[str]) -> str:
+    """Join split version components into a version string.
+    This function assumes the input came from :func:`_version_split`, where the
+    first component must be the epoch (either empty or numeric), and all other
+    components numeric.
+    """
+    epoch, *rest = components
+    return f"{epoch}!{'.'.join(rest)}"
+def _is_not_suffix(segment: str) -> bool:
+    return not any(
+        segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post")
+    )
+def _pad_version(left: list[str], right: list[str]) -> tuple[list[str], list[str]]:
+    left_split, right_split = [], []
+    # Get the release segment of our versions
+    left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left)))
+    right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right)))
+    # Get the rest of our versions
+    left_split.append(left[len(left_split[0]) :])
+    right_split.append(right[len(right_split[0]) :])
+    # Insert our padding
+    left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0])))
+    right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0])))
+    return (
+        list(itertools.chain.from_iterable(left_split)),
+        list(itertools.chain.from_iterable(right_split)),
+    )
+class SpecifierSet(BaseSpecifier):
+    """This class abstracts handling of a set of version specifiers.
+    It can be passed a single specifier (``>=3.0``), a comma-separated list of
+    specifiers (``>=3.0,!=3.1``), or no specifier at all.
+    """
+    def __init__(
+        self,
+        specifiers: str | Iterable[Specifier] = "",
+        prereleases: bool | None = None,
+    ) -> None:
+        """Initialize a SpecifierSet instance.
+        :param specifiers:
+            The string representation of a specifier or a comma-separated list of
+            specifiers which will be parsed and normalized before use.
+            May also be an iterable of ``Specifier`` instances, which will be used
+            as is.
+        :param prereleases:
+            This tells the SpecifierSet if it should accept prerelease versions if
+            applicable or not. The default of ``None`` will autodetect it from the
+            given specifiers.
+        :raises InvalidSpecifier:
+            If the given ``specifiers`` are not parseable than this exception will be
+            raised.
+        """
+        if isinstance(specifiers, str):
+            # Split on `,` to break each individual specifier into its own item, and
+            # strip each item to remove leading/trailing whitespace.
+            split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()]
+            # Make each individual specifier a Specifier and save in a frozen set
+            # for later.
+            self._specs = frozenset(map(Specifier, split_specifiers))
+        else:
+            # Save the supplied specifiers in a frozen set.
+            self._specs = frozenset(specifiers)
+        # Store our prereleases value so we can use it later to determine if
+        # we accept prereleases or not.
+        self._prereleases = prereleases
+    @property
+    def prereleases(self) -> bool | None:
+        # If we have been given an explicit prerelease modifier, then we'll
+        # pass that through here.
+        if self._prereleases is not None:
+            return self._prereleases
+        # If we don't have any specifiers, and we don't have a forced value,
+        # then we'll just return None since we don't know if this should have
+        # pre-releases or not.
+        if not self._specs:
+            return None
+        # Otherwise we'll see if any of the given specifiers accept
+        # prereleases, if any of them do we'll return True, otherwise False.
+        return any(s.prereleases for s in self._specs)
+    @prereleases.setter
+    def prereleases(self, value: bool) -> None:
+        self._prereleases = value
+    def __repr__(self) -> str:
+        """A representation of the specifier set that shows all internal state.
+        Note that the ordering of the individual specifiers within the set may not
+        match the input string.
+        >>> SpecifierSet('>=1.0.0,!=2.0.0')
+        <SpecifierSet('!=2.0.0,>=1.0.0')>
+        >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False)
+        <SpecifierSet('!=2.0.0,>=1.0.0', prereleases=False)>
+        >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True)
+        <SpecifierSet('!=2.0.0,>=1.0.0', prereleases=True)>
+        """
+        pre = (
+            f", prereleases={self.prereleases!r}"
+            if self._prereleases is not None
+            else ""
+        )
+        return f"<SpecifierSet({str(self)!r}{pre})>"
+    def __str__(self) -> str:
+        """A string representation of the specifier set that can be round-tripped.
+        Note that the ordering of the individual specifiers within the set may not
+        match the input string.
+        >>> str(SpecifierSet(">=1.0.0,!=1.0.1"))
+        '!=1.0.1,>=1.0.0'
+        >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False))
+        '!=1.0.1,>=1.0.0'
+        """
+        return ",".join(sorted(str(s) for s in self._specs))
+    def __hash__(self) -> int:
+        return hash(self._specs)
+    def __and__(self, other: SpecifierSet | str) -> SpecifierSet:
+        """Return a SpecifierSet which is a combination of the two sets.
+        :param other: The other object to combine with.
+        >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1'
+        <SpecifierSet('!=1.0.1,!=2.0.1,<=2.0.0,>=1.0.0')>
+        >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1')
+        <SpecifierSet('!=1.0.1,!=2.0.1,<=2.0.0,>=1.0.0')>
+        """
+        if isinstance(other, str):
+            other = SpecifierSet(other)
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+        specifier = SpecifierSet()
+        specifier._specs = frozenset(self._specs | other._specs)
+        if self._prereleases is None and other._prereleases is not None:
+            specifier._prereleases = other._prereleases
+        elif self._prereleases is not None and other._prereleases is None:
+            specifier._prereleases = self._prereleases
+        elif self._prereleases == other._prereleases:
+            specifier._prereleases = self._prereleases
+        else:
+            raise ValueError(
+                "Cannot combine SpecifierSets with True and False prerelease "
+                "overrides."
+            )
+        return specifier
+    def __eq__(self, other: object) -> bool:
+        """Whether or not the two SpecifierSet-like objects are equal.
+        :param other: The other object to check against.
+        The value of :attr:`prereleases` is ignored.
+        >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1")
+        True
+        >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) ==
+        ...  SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True))
+        True
+        >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1"
+        True
+        >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0")
+        False
+        >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2")
+        False
+        """
+        if isinstance(other, (str, Specifier)):
+            other = SpecifierSet(str(other))
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+        return self._specs == other._specs
+    def __len__(self) -> int:
+        """Returns the number of specifiers in this specifier set."""
+        return len(self._specs)
+    def __iter__(self) -> Iterator[Specifier]:
+        """
+        Returns an iterator over all the underlying :class:`Specifier` instances
+        in this specifier set.
+        >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str)
+        [<Specifier('!=1.0.1')>, <Specifier('>=1.0.0')>]
+        """
+        return iter(self._specs)
+    def __contains__(self, item: UnparsedVersion) -> bool:
+        """Return whether or not the item is contained in this specifier.
+        :param item: The item to check for.
+        This is used for the ``in`` operator and behaves the same as
+        :meth:`contains` with no ``prereleases`` argument passed.
+        >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1")
+        True
+        >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1")
+        True
+        >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1")
+        False
+        >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1")
+        False
+        >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)
+        True
+        """
+        return self.contains(item)
+    def contains(
+        self,
+        item: UnparsedVersion,
+        prereleases: bool | None = None,
+        installed: bool | None = None,
+    ) -> bool:
+        """Return whether or not the item is contained in this SpecifierSet.
+        :param item:
+            The item to check for, which can be a version string or a
+            :class:`Version` instance.
+        :param prereleases:
+            Whether or not to match prereleases with this SpecifierSet. If set to
+            ``None`` (the default), it uses :attr:`prereleases` to determine
+            whether or not prereleases are allowed.
+        >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3")
+        True
+        >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3"))
+        True
+        >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1")
+        False
+        >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1")
+        False
+        >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1")
+        True
+        >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True)
+        True
+        """
+        # Ensure that our item is a Version instance.
+        if not isinstance(item, Version):
+            item = Version(item)
+        # Determine if we're forcing a prerelease or not, if we're not forcing
+        # one for this particular filter call, then we'll use whatever the
+        # SpecifierSet thinks for whether or not we should support prereleases.
+        if prereleases is None:
+            prereleases = self.prereleases
+        # We can determine if we're going to allow pre-releases by looking to
+        # see if any of the underlying items supports them. If none of them do
+        # and this item is a pre-release then we do not allow it and we can
+        # short circuit that here.
+        # Note: This means that 1.0.dev1 would not be contained in something
+        #       like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0
+        if not prereleases and item.is_prerelease:
+            return False
+        if installed and item.is_prerelease:
+            item = Version(item.base_version)
+        # We simply dispatch to the underlying specs here to make sure that the
+        # given version is contained within all of them.
+        # Note: This use of all() here means that an empty set of specifiers
+        #       will always return True, this is an explicit design decision.
+        return all(s.contains(item, prereleases=prereleases) for s in self._specs)
+    def filter(
+        self, iterable: Iterable[UnparsedVersionVar], prereleases: bool | None = None
+    ) -> Iterator[UnparsedVersionVar]:
+        """Filter items in the given iterable, that match the specifiers in this set.
+        :param iterable:
+            An iterable that can contain version strings and :class:`Version` instances.
+            The items in the iterable will be filtered according to the specifier.
+        :param prereleases:
+            Whether or not to allow prereleases in the returned iterator. If set to
+            ``None`` (the default), it will be intelligently decide whether to allow
+            prereleases or not (based on the :attr:`prereleases` attribute, and
+            whether the only versions matching are prereleases).
+        This method is smarter than just ``filter(SpecifierSet(...).contains, [...])``
+        because it implements the rule from :pep:`440` that a prerelease item
+        SHOULD be accepted if no other versions match the given specifier.
+        >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"]))
+        ['1.3']
+        >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")]))
+        ['1.3', <Version('1.4')>]
+        >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"]))
+        []
+        >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True))
+        ['1.3', '1.5a1']
+        >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"]))
+        ['1.3', '1.5a1']
+        An "empty" SpecifierSet will filter items based on the presence of prerelease
+        versions in the set.
+        >>> list(SpecifierSet("").filter(["1.3", "1.5a1"]))
+        ['1.3']
+        >>> list(SpecifierSet("").filter(["1.5a1"]))
+        ['1.5a1']
+        >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"]))
+        ['1.3', '1.5a1']
+        >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True))
+        ['1.3', '1.5a1']
+        """
+        # Determine if we're forcing a prerelease or not, if we're not forcing
+        # one for this particular filter call, then we'll use whatever the
+        # SpecifierSet thinks for whether or not we should support prereleases.
+        if prereleases is None:
+            prereleases = self.prereleases
+        # If we have any specifiers, then we want to wrap our iterable in the
+        # filter method for each one, this will act as a logical AND amongst
+        # each specifier.
+        if self._specs:
+            for spec in self._specs:
+                iterable = spec.filter(iterable, prereleases=bool(prereleases))
+            return iter(iterable)
+        # If we do not have any specifiers, then we need to have a rough filter
+        # which will filter out any pre-releases, unless there are no final
+        # releases.
+        else:
+            filtered: list[UnparsedVersionVar] = []
+            found_prereleases: list[UnparsedVersionVar] = []
+            for item in iterable:
+                parsed_version = _coerce_version(item)
+                # Store any item which is a pre-release for later unless we've
+                # already found a final version or we are accepting prereleases
+                if parsed_version.is_prerelease and not prereleases:
+                    if not filtered:
+                        found_prereleases.append(item)
+                else:
+                    filtered.append(item)
+            # If we've found no items except for pre-releases, then we'll go
+            # ahead and use the pre-releases
+            if not filtered and found_prereleases and prereleases is None:
+                return iter(found_prereleases)
+            return iter(filtered)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/tags.py ADDED Viewed

	@@ -0,0 +1,617 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import annotations
+import logging
+import platform
+import re
+import struct
+import subprocess
+import sys
+import sysconfig
+from importlib.machinery import EXTENSION_SUFFIXES
+from typing import (
+    Iterable,
+    Iterator,
+    Sequence,
+    Tuple,
+    cast,
+)
+from . import _manylinux, _musllinux
+logger = logging.getLogger(__name__)
+PythonVersion = Sequence[int]
+AppleVersion = Tuple[int, int]
+INTERPRETER_SHORT_NAMES: dict[str, str] = {
+    "python": "py",  # Generic.
+    "cpython": "cp",
+    "pypy": "pp",
+    "ironpython": "ip",
+    "jython": "jy",
+}
+_32_BIT_INTERPRETER = struct.calcsize("P") == 4
+class Tag:
+    """
+    A representation of the tag triple for a wheel.
+    Instances are considered immutable and thus are hashable. Equality checking
+    is also supported.
+    """
+    __slots__ = ["_abi", "_hash", "_interpreter", "_platform"]
+    def __init__(self, interpreter: str, abi: str, platform: str) -> None:
+        self._interpreter = interpreter.lower()
+        self._abi = abi.lower()
+        self._platform = platform.lower()
+        # The __hash__ of every single element in a Set[Tag] will be evaluated each time
+        # that a set calls its `.disjoint()` method, which may be called hundreds of
+        # times when scanning a page of links for packages with tags matching that
+        # Set[Tag]. Pre-computing the value here produces significant speedups for
+        # downstream consumers.
+        self._hash = hash((self._interpreter, self._abi, self._platform))
+    @property
+    def interpreter(self) -> str:
+        return self._interpreter
+    @property
+    def abi(self) -> str:
+        return self._abi
+    @property
+    def platform(self) -> str:
+        return self._platform
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Tag):
+            return NotImplemented
+        return (
+            (self._hash == other._hash)  # Short-circuit ASAP for perf reasons.
+            and (self._platform == other._platform)
+            and (self._abi == other._abi)
+            and (self._interpreter == other._interpreter)
+        )
+    def __hash__(self) -> int:
+        return self._hash
+    def __str__(self) -> str:
+        return f"{self._interpreter}-{self._abi}-{self._platform}"
+    def __repr__(self) -> str:
+        return f"<{self} @ {id(self)}>"
+def parse_tag(tag: str) -> frozenset[Tag]:
+    """
+    Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances.
+    Returning a set is required due to the possibility that the tag is a
+    compressed tag set.
+    """
+    tags = set()
+    interpreters, abis, platforms = tag.split("-")
+    for interpreter in interpreters.split("."):
+        for abi in abis.split("."):
+            for platform_ in platforms.split("."):
+                tags.add(Tag(interpreter, abi, platform_))
+    return frozenset(tags)
+def _get_config_var(name: str, warn: bool = False) -> int | str | None:
+    value: int | str | None = sysconfig.get_config_var(name)
+    if value is None and warn:
+        logger.debug(
+            "Config variable '%s' is unset, Python ABI tag may be incorrect", name
+        )
+    return value
+def _normalize_string(string: str) -> str:
+    return string.replace(".", "_").replace("-", "_").replace(" ", "_")
+def _is_threaded_cpython(abis: list[str]) -> bool:
+    """
+    Determine if the ABI corresponds to a threaded (`--disable-gil`) build.
+    The threaded builds are indicated by a "t" in the abiflags.
+    """
+    if len(abis) == 0:
+        return False
+    # expect e.g., cp313
+    m = re.match(r"cp\d+(.*)", abis[0])
+    if not m:
+        return False
+    abiflags = m.group(1)
+    return "t" in abiflags
+def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool:
+    """
+    Determine if the Python version supports abi3.
+    PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`)
+    builds do not support abi3.
+    """
+    return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading
+def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> list[str]:
+    py_version = tuple(py_version)  # To allow for version comparison.
+    abis = []
+    version = _version_nodot(py_version[:2])
+    threading = debug = pymalloc = ucs4 = ""
+    with_debug = _get_config_var("Py_DEBUG", warn)
+    has_refcount = hasattr(sys, "gettotalrefcount")
+    # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled
+    # extension modules is the best option.
+    # https://github.com/pypa/pip/issues/3383#issuecomment-173267692
+    has_ext = "_d.pyd" in EXTENSION_SUFFIXES
+    if with_debug or (with_debug is None and (has_refcount or has_ext)):
+        debug = "d"
+    if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn):
+        threading = "t"
+    if py_version < (3, 8):
+        with_pymalloc = _get_config_var("WITH_PYMALLOC", warn)
+        if with_pymalloc or with_pymalloc is None:
+            pymalloc = "m"
+        if py_version < (3, 3):
+            unicode_size = _get_config_var("Py_UNICODE_SIZE", warn)
+            if unicode_size == 4 or (
+                unicode_size is None and sys.maxunicode == 0x10FFFF
+            ):
+                ucs4 = "u"
+    elif debug:
+        # Debug builds can also load "normal" extension modules.
+        # We can also assume no UCS-4 or pymalloc requirement.
+        abis.append(f"cp{version}{threading}")
+    abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}")
+    return abis
+def cpython_tags(
+    python_version: PythonVersion | None = None,
+    abis: Iterable[str] | None = None,
+    platforms: Iterable[str] | None = None,
+    *,
+    warn: bool = False,
+) -> Iterator[Tag]:
+    """
+    Yields the tags for a CPython interpreter.
+    The tags consist of:
+    - cp<python_version>-<abi>-<platform>
+    - cp<python_version>-abi3-<platform>
+    - cp<python_version>-none-<platform>
+    - cp<less than python_version>-abi3-<platform>  # Older Python versions down to 3.2.
+    If python_version only specifies a major version then user-provided ABIs and
+    the 'none' ABItag will be used.
+    If 'abi3' or 'none' are specified in 'abis' then they will be yielded at
+    their normal position and not at the beginning.
+    """
+    if not python_version:
+        python_version = sys.version_info[:2]
+    interpreter = f"cp{_version_nodot(python_version[:2])}"
+    if abis is None:
+        if len(python_version) > 1:
+            abis = _cpython_abis(python_version, warn)
+        else:
+            abis = []
+    abis = list(abis)
+    # 'abi3' and 'none' are explicitly handled later.
+    for explicit_abi in ("abi3", "none"):
+        try:
+            abis.remove(explicit_abi)
+        except ValueError:
+            pass
+    platforms = list(platforms or platform_tags())
+    for abi in abis:
+        for platform_ in platforms:
+            yield Tag(interpreter, abi, platform_)
+    threading = _is_threaded_cpython(abis)
+    use_abi3 = _abi3_applies(python_version, threading)
+    if use_abi3:
+        yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms)
+    yield from (Tag(interpreter, "none", platform_) for platform_ in platforms)
+    if use_abi3:
+        for minor_version in range(python_version[1] - 1, 1, -1):
+            for platform_ in platforms:
+                version = _version_nodot((python_version[0], minor_version))
+                interpreter = f"cp{version}"
+                yield Tag(interpreter, "abi3", platform_)
+def _generic_abi() -> list[str]:
+    """
+    Return the ABI tag based on EXT_SUFFIX.
+    """
+    # The following are examples of `EXT_SUFFIX`.
+    # We want to keep the parts which are related to the ABI and remove the
+    # parts which are related to the platform:
+    # - linux:   '.cpython-310-x86_64-linux-gnu.so' => cp310
+    # - mac:     '.cpython-310-darwin.so'           => cp310
+    # - win:     '.cp310-win_amd64.pyd'             => cp310
+    # - win:     '.pyd'                             => cp37 (uses _cpython_abis())
+    # - pypy:    '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73
+    # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib'
+    #                                               => graalpy_38_native
+    ext_suffix = _get_config_var("EXT_SUFFIX", warn=True)
+    if not isinstance(ext_suffix, str) or ext_suffix[0] != ".":
+        raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')")
+    parts = ext_suffix.split(".")
+    if len(parts) < 3:
+        # CPython3.7 and earlier uses ".pyd" on Windows.
+        return _cpython_abis(sys.version_info[:2])
+    soabi = parts[1]
+    if soabi.startswith("cpython"):
+        # non-windows
+        abi = "cp" + soabi.split("-")[1]
+    elif soabi.startswith("cp"):
+        # windows
+        abi = soabi.split("-")[0]
+    elif soabi.startswith("pypy"):
+        abi = "-".join(soabi.split("-")[:2])
+    elif soabi.startswith("graalpy"):
+        abi = "-".join(soabi.split("-")[:3])
+    elif soabi:
+        # pyston, ironpython, others?
+        abi = soabi
+    else:
+        return []
+    return [_normalize_string(abi)]
+def generic_tags(
+    interpreter: str | None = None,
+    abis: Iterable[str] | None = None,
+    platforms: Iterable[str] | None = None,
+    *,
+    warn: bool = False,
+) -> Iterator[Tag]:
+    """
+    Yields the tags for a generic interpreter.
+    The tags consist of:
+    - <interpreter>-<abi>-<platform>
+    The "none" ABI will be added if it was not explicitly provided.
+    """
+    if not interpreter:
+        interp_name = interpreter_name()
+        interp_version = interpreter_version(warn=warn)
+        interpreter = "".join([interp_name, interp_version])
+    if abis is None:
+        abis = _generic_abi()
+    else:
+        abis = list(abis)
+    platforms = list(platforms or platform_tags())
+    if "none" not in abis:
+        abis.append("none")
+    for abi in abis:
+        for platform_ in platforms:
+            yield Tag(interpreter, abi, platform_)
+def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]:
+    """
+    Yields Python versions in descending order.
+    After the latest version, the major-only version will be yielded, and then
+    all previous versions of that major version.
+    """
+    if len(py_version) > 1:
+        yield f"py{_version_nodot(py_version[:2])}"
+    yield f"py{py_version[0]}"
+    if len(py_version) > 1:
+        for minor in range(py_version[1] - 1, -1, -1):
+            yield f"py{_version_nodot((py_version[0], minor))}"
+def compatible_tags(
+    python_version: PythonVersion | None = None,
+    interpreter: str | None = None,
+    platforms: Iterable[str] | None = None,
+) -> Iterator[Tag]:
+    """
+    Yields the sequence of tags that are compatible with a specific version of Python.
+    The tags consist of:
+    - py*-none-<platform>
+    - <interpreter>-none-any  # ... if `interpreter` is provided.
+    - py*-none-any
+    """
+    if not python_version:
+        python_version = sys.version_info[:2]
+    platforms = list(platforms or platform_tags())
+    for version in _py_interpreter_range(python_version):
+        for platform_ in platforms:
+            yield Tag(version, "none", platform_)
+    if interpreter:
+        yield Tag(interpreter, "none", "any")
+    for version in _py_interpreter_range(python_version):
+        yield Tag(version, "none", "any")
+def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str:
+    if not is_32bit:
+        return arch
+    if arch.startswith("ppc"):
+        return "ppc"
+    return "i386"
+def _mac_binary_formats(version: AppleVersion, cpu_arch: str) -> list[str]:
+    formats = [cpu_arch]
+    if cpu_arch == "x86_64":
+        if version < (10, 4):
+            return []
+        formats.extend(["intel", "fat64", "fat32"])
+    elif cpu_arch == "i386":
+        if version < (10, 4):
+            return []
+        formats.extend(["intel", "fat32", "fat"])
+    elif cpu_arch == "ppc64":
+        # TODO: Need to care about 32-bit PPC for ppc64 through 10.2?
+        if version > (10, 5) or version < (10, 4):
+            return []
+        formats.append("fat64")
+    elif cpu_arch == "ppc":
+        if version > (10, 6):
+            return []
+        formats.extend(["fat32", "fat"])
+    if cpu_arch in {"arm64", "x86_64"}:
+        formats.append("universal2")
+    if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}:
+        formats.append("universal")
+    return formats
+def mac_platforms(
+    version: AppleVersion | None = None, arch: str | None = None
+) -> Iterator[str]:
+    """
+    Yields the platform tags for a macOS system.
+    The `version` parameter is a two-item tuple specifying the macOS version to
+    generate platform tags for. The `arch` parameter is the CPU architecture to
+    generate platform tags for. Both parameters default to the appropriate value
+    for the current system.
+    """
+    version_str, _, cpu_arch = platform.mac_ver()
+    if version is None:
+        version = cast("AppleVersion", tuple(map(int, version_str.split(".")[:2])))
+        if version == (10, 16):
+            # When built against an older macOS SDK, Python will report macOS 10.16
+            # instead of the real version.
+            version_str = subprocess.run(
+                [
+                    sys.executable,
+                    "-sS",
+                    "-c",
+                    "import platform; print(platform.mac_ver()[0])",
+                ],
+                check=True,
+                env={"SYSTEM_VERSION_COMPAT": "0"},
+                stdout=subprocess.PIPE,
+                text=True,
+            ).stdout
+            version = cast("AppleVersion", tuple(map(int, version_str.split(".")[:2])))
+    else:
+        version = version
+    if arch is None:
+        arch = _mac_arch(cpu_arch)
+    else:
+        arch = arch
+    if (10, 0) <= version and version < (11, 0):
+        # Prior to Mac OS 11, each yearly release of Mac OS bumped the
+        # "minor" version number.  The major version was always 10.
+        major_version = 10
+        for minor_version in range(version[1], -1, -1):
+            compat_version = major_version, minor_version
+            binary_formats = _mac_binary_formats(compat_version, arch)
+            for binary_format in binary_formats:
+                yield f"macosx_{major_version}_{minor_version}_{binary_format}"
+    if version >= (11, 0):
+        # Starting with Mac OS 11, each yearly release bumps the major version
+        # number.   The minor versions are now the midyear updates.
+        minor_version = 0
+        for major_version in range(version[0], 10, -1):
+            compat_version = major_version, minor_version
+            binary_formats = _mac_binary_formats(compat_version, arch)
+            for binary_format in binary_formats:
+                yield f"macosx_{major_version}_{minor_version}_{binary_format}"
+    if version >= (11, 0):
+        # Mac OS 11 on x86_64 is compatible with binaries from previous releases.
+        # Arm64 support was introduced in 11.0, so no Arm binaries from previous
+        # releases exist.
+        #
+        # However, the "universal2" binary format can have a
+        # macOS version earlier than 11.0 when the x86_64 part of the binary supports
+        # that version of macOS.
+        major_version = 10
+        if arch == "x86_64":
+            for minor_version in range(16, 3, -1):
+                compat_version = major_version, minor_version
+                binary_formats = _mac_binary_formats(compat_version, arch)
+                for binary_format in binary_formats:
+                    yield f"macosx_{major_version}_{minor_version}_{binary_format}"
+        else:
+            for minor_version in range(16, 3, -1):
+                compat_version = major_version, minor_version
+                binary_format = "universal2"
+                yield f"macosx_{major_version}_{minor_version}_{binary_format}"
+def ios_platforms(
+    version: AppleVersion | None = None, multiarch: str | None = None
+) -> Iterator[str]:
+    """
+    Yields the platform tags for an iOS system.
+    :param version: A two-item tuple specifying the iOS version to generate
+        platform tags for. Defaults to the current iOS version.
+    :param multiarch: The CPU architecture+ABI to generate platform tags for -
+        (the value used by `sys.implementation._multiarch` e.g.,
+        `arm64_iphoneos` or `x84_64_iphonesimulator`). Defaults to the current
+        multiarch value.
+    """
+    if version is None:
+        # if iOS is the current platform, ios_ver *must* be defined. However,
+        # it won't exist for CPython versions before 3.13, which causes a mypy
+        # error.
+        _, release, _, _ = platform.ios_ver()  # type: ignore[attr-defined, unused-ignore]
+        version = cast("AppleVersion", tuple(map(int, release.split(".")[:2])))
+    if multiarch is None:
+        multiarch = sys.implementation._multiarch
+    multiarch = multiarch.replace("-", "_")
+    ios_platform_template = "ios_{major}_{minor}_{multiarch}"
+    # Consider any iOS major.minor version from the version requested, down to
+    # 12.0. 12.0 is the first iOS version that is known to have enough features
+    # to support CPython. Consider every possible minor release up to X.9. There
+    # highest the minor has ever gone is 8 (14.8 and 15.8) but having some extra
+    # candidates that won't ever match doesn't really hurt, and it saves us from
+    # having to keep an explicit list of known iOS versions in the code. Return
+    # the results descending order of version number.
+    # If the requested major version is less than 12, there won't be any matches.
+    if version[0] < 12:
+        return
+    # Consider the actual X.Y version that was requested.
+    yield ios_platform_template.format(
+        major=version[0], minor=version[1], multiarch=multiarch
+    )
+    # Consider every minor version from X.0 to the minor version prior to the
+    # version requested by the platform.
+    for minor in range(version[1] - 1, -1, -1):
+        yield ios_platform_template.format(
+            major=version[0], minor=minor, multiarch=multiarch
+        )
+    for major in range(version[0] - 1, 11, -1):
+        for minor in range(9, -1, -1):
+            yield ios_platform_template.format(
+                major=major, minor=minor, multiarch=multiarch
+            )
+def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]:
+    linux = _normalize_string(sysconfig.get_platform())
+    if not linux.startswith("linux_"):
+        # we should never be here, just yield the sysconfig one and return
+        yield linux
+        return
+    if is_32bit:
+        if linux == "linux_x86_64":
+            linux = "linux_i686"
+        elif linux == "linux_aarch64":
+            linux = "linux_armv8l"
+    _, arch = linux.split("_", 1)
+    archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch])
+    yield from _manylinux.platform_tags(archs)
+    yield from _musllinux.platform_tags(archs)
+    for arch in archs:
+        yield f"linux_{arch}"
+def _generic_platforms() -> Iterator[str]:
+    yield _normalize_string(sysconfig.get_platform())
+def platform_tags() -> Iterator[str]:
+    """
+    Provides the platform tags for this installation.
+    """
+    if platform.system() == "Darwin":
+        return mac_platforms()
+    elif platform.system() == "iOS":
+        return ios_platforms()
+    elif platform.system() == "Linux":
+        return _linux_platforms()
+    else:
+        return _generic_platforms()
+def interpreter_name() -> str:
+    """
+    Returns the name of the running interpreter.
+    Some implementations have a reserved, two-letter abbreviation which will
+    be returned when appropriate.
+    """
+    name = sys.implementation.name
+    return INTERPRETER_SHORT_NAMES.get(name) or name
+def interpreter_version(*, warn: bool = False) -> str:
+    """
+    Returns the version of the running interpreter.
+    """
+    version = _get_config_var("py_version_nodot", warn=warn)
+    if version:
+        version = str(version)
+    else:
+        version = _version_nodot(sys.version_info[:2])
+    return version
+def _version_nodot(version: PythonVersion) -> str:
+    return "".join(map(str, version))
+def sys_tags(*, warn: bool = False) -> Iterator[Tag]:
+    """
+    Returns the sequence of tag triples for the running interpreter.
+    The order of the sequence corresponds to priority order for the
+    interpreter, from most to least important.
+    """
+    interp_name = interpreter_name()
+    if interp_name == "cp":
+        yield from cpython_tags(warn=warn)
+    else:
+        yield from generic_tags()
+    if interp_name == "pp":
+        interp = "pp3"
+    elif interp_name == "cp":
+        interp = "cp" + interpreter_version(warn=warn)
+    else:
+        interp = None
+    yield from compatible_tags(interpreter=interp)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging/version.py ADDED Viewed

	@@ -0,0 +1,582 @@

+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+"""
+.. testsetup::
+    from packaging.version import parse, Version
+"""
+from __future__ import annotations
+import itertools
+import re
+from typing import Any, Callable, NamedTuple, SupportsInt, Tuple, Union
+from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
+__all__ = ["VERSION_PATTERN", "InvalidVersion", "Version", "parse"]
+LocalType = Tuple[Union[int, str], ...]
+CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]]
+CmpLocalType = Union[
+    NegativeInfinityType,
+    Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...],
+]
+CmpKey = Tuple[
+    int,
+    Tuple[int, ...],
+    CmpPrePostDevType,
+    CmpPrePostDevType,
+    CmpPrePostDevType,
+    CmpLocalType,
+]
+VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool]
+class _Version(NamedTuple):
+    epoch: int
+    release: tuple[int, ...]
+    dev: tuple[str, int] | None
+    pre: tuple[str, int] | None
+    post: tuple[str, int] | None
+    local: LocalType | None
+def parse(version: str) -> Version:
+    """Parse the given version string.
+    >>> parse('1.0.dev1')
+    <Version('1.0.dev1')>
+    :param version: The version string to parse.
+    :raises InvalidVersion: When the version string is not a valid version.
+    """
+    return Version(version)
+class InvalidVersion(ValueError):
+    """Raised when a version string is not a valid version.
+    >>> Version("invalid")
+    Traceback (most recent call last):
+        ...
+    packaging.version.InvalidVersion: Invalid version: 'invalid'
+    """
+class _BaseVersion:
+    _key: tuple[Any, ...]
+    def __hash__(self) -> int:
+        return hash(self._key)
+    # Please keep the duplicated `isinstance` check
+    # in the six comparisons hereunder
+    # unless you find a way to avoid adding overhead function calls.
+    def __lt__(self, other: _BaseVersion) -> bool:
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+        return self._key < other._key
+    def __le__(self, other: _BaseVersion) -> bool:
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+        return self._key <= other._key
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+        return self._key == other._key
+    def __ge__(self, other: _BaseVersion) -> bool:
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+        return self._key >= other._key
+    def __gt__(self, other: _BaseVersion) -> bool:
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+        return self._key > other._key
+    def __ne__(self, other: object) -> bool:
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+        return self._key != other._key
+# Deliberately not anchored to the start and end of the string, to make it
+# easier for 3rd party code to reuse
+_VERSION_PATTERN = r"""
+    v?
+    (?:
+        (?:(?P<epoch>[0-9]+)!)?                           # epoch
+        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
+        (?P<pre>                                          # pre-release
+            [-_\.]?
+            (?P<pre_l>alpha|a|beta|b|preview|pre|c|rc)
+            [-_\.]?
+            (?P<pre_n>[0-9]+)?
+        )?
+        (?P<post>                                         # post release
+            (?:-(?P<post_n1>[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?P<post_l>post|rev|r)
+                [-_\.]?
+                (?P<post_n2>[0-9]+)?
+            )
+        )?
+        (?P<dev>                                          # dev release
+            [-_\.]?
+            (?P<dev_l>dev)
+            [-_\.]?
+            (?P<dev_n>[0-9]+)?
+        )?
+    )
+    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+:meta hide-value:
+"""
+class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    <Version('1.0a5')>
+    >>> v2
+    <Version('1.0')>
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
+    def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: {version!r}")
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+    def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+        >>> Version('1.0.0')
+        <Version('1.0.0')>
+        """
+        return f"<Version('{self}')>"
+    def __str__(self) -> str:
+        """A string representation of the version that can be round-tripped.
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
+        parts = []
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+        return "".join(parts)
+    @property
+    def epoch(self) -> int:
+        """The epoch of the version.
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
+    @property
+    def release(self) -> tuple[int, ...]:
+        """The components of the "release" segment of the version.
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
+    @property
+    def pre(self) -> tuple[str, int] | None:
+        """The pre-release segment of the version.
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
+    @property
+    def post(self) -> int | None:
+        """The post-release number of the version.
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
+        return self._version.post[1] if self._version.post else None
+    @property
+    def dev(self) -> int | None:
+        """The development number of the version.
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
+        return self._version.dev[1] if self._version.dev else None
+    @property
+    def local(self) -> str | None:
+        """The local version segment of the version.
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+    @property
+    def public(self) -> str:
+        """The public portion of the version.
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1!1.2.3dev1+abc").public
+        '1!1.2.3.dev1'
+        """
+        return str(self).split("+", 1)[0]
+    @property
+    def base_version(self) -> str:
+        """The "base version" of the version.
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3dev1+abc").base_version
+        '1!1.2.3'
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
+        parts = []
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+        return "".join(parts)
+    @property
+    def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
+        return self.dev is not None or self.pre is not None
+    @property
+    def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
+        return self.post is not None
+    @property
+    def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
+        return self.dev is not None
+    @property
+    def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+        >>> Version("1.2.3").major
+        1
+        """
+        return self.release[0] if len(self.release) >= 1 else 0
+    @property
+    def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
+        return self.release[1] if len(self.release) >= 2 else 0
+    @property
+    def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
+        return self.release[2] if len(self.release) >= 3 else 0
+class _TrimmedRelease(Version):
+    @property
+    def release(self) -> tuple[int, ...]:
+        """
+        Release segment without any trailing zeros.
+        >>> _TrimmedRelease('1.0.0').release
+        (1,)
+        >>> _TrimmedRelease('0.0').release
+        (0,)
+        """
+        rel = super().release
+        nonzeros = (index for index, val in enumerate(rel) if val)
+        last_nonzero = max(nonzeros, default=0)
+        return rel[: last_nonzero + 1]
+def _parse_letter_version(
+    letter: str | None, number: str | bytes | SupportsInt | None
+) -> tuple[str, int] | None:
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+        return letter, int(number)
+    assert not letter
+    if number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+        return letter, int(number)
+    return None
+_local_version_separators = re.compile(r"[\._-]")
+def _parse_local_version(local: str | None) -> LocalType | None:
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+def _cmpkey(
+    epoch: int,
+    release: tuple[int, ...],
+    pre: tuple[str, int] | None,
+    post: tuple[str, int] | None,
+    dev: tuple[str, int] | None,
+    local: LocalType | None,
+) -> CmpKey:
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre: CmpPrePostDevType = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post: CmpPrePostDevType = NegativeInfinity
+    else:
+        _post = post
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev: CmpPrePostDevType = Infinity
+    else:
+        _dev = dev
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local: CmpLocalType = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+    return epoch, _release, _pre, _post, _dev, _local

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/cachecontrol/__init__.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+"""CacheControl import Interface.
+Make it easy to import from cachecontrol without long namespaces.
+"""
+__author__ = "Eric Larson"
+__email__ = "eric@ionrock.org"
+__version__ = "0.14.0"
+from pip._vendor.cachecontrol.adapter import CacheControlAdapter
+from pip._vendor.cachecontrol.controller import CacheController
+from pip._vendor.cachecontrol.wrapper import CacheControl
+__all__ = [
+    "__author__",
+    "__email__",
+    "__version__",
+    "CacheControlAdapter",
+    "CacheController",
+    "CacheControl",
+]
+import logging
+logging.getLogger(__name__).addHandler(logging.NullHandler())

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (999 Bytes). View file