koichi12 commited on Feb 12, 2025

Commit

2e7ec00

verified ·

1 Parent(s): 13ae937

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_api.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_error.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/_api.py +323 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py +287 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/common.py +175 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/copy.py +557 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/approximation.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/differentiation.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti.h +123 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_callbacks.h +760 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_checkpoint.h +127 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_nvtx_cbid.h +111 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_profiler_target.h +588 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_nvtx_meta.h +247 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_common.h +273 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_cuda_host.h +197 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_host.h +1471 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverMg.h +318 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverRf.h +339 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverSp_LOWLEVEL_PREVIEW.h +1107 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/lib/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/include/nccl_net.h +456 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtCudaRt.h +146 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCore.h +299 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h +112 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h +133 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h +192 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h +114 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInitDefs.h +565 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxTypes.h +333 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/lib/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/INSTALLER +1 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/LICENSE +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/RECORD +41 -0

tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_api.cpython-311.pyc ADDED Viewed

Binary file (14.6 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_error.cpython-311.pyc ADDED Viewed

Binary file (1.97 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc ADDED Viewed

Binary file (667 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/_api.py ADDED Viewed

	@@ -0,0 +1,323 @@

+from __future__ import annotations
+import contextlib
+import logging
+import os
+import time
+import warnings
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from threading import local
+from typing import TYPE_CHECKING, Any, ClassVar
+from weakref import WeakValueDictionary
+from ._error import Timeout
+if TYPE_CHECKING:
+    import sys
+    from types import TracebackType
+    if sys.version_info >= (3, 11):  # pragma: no cover (py311+)
+        from typing import Self
+    else:  # pragma: no cover (<py311)
+        from typing_extensions import Self
+_LOGGER = logging.getLogger("filelock")
+# This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
+# is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
+# again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
+class AcquireReturnProxy:
+    """A context aware object that will release the lock file when exiting."""
+    def __init__(self, lock: BaseFileLock) -> None:
+        self.lock = lock
+    def __enter__(self) -> BaseFileLock:
+        return self.lock
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        self.lock.release()
+@dataclass
+class FileLockContext:
+    """A dataclass which holds the context for a ``BaseFileLock`` object."""
+    # The context is held in a separate class to allow optional use of thread local storage via the
+    # ThreadLocalFileContext class.
+    #: The path to the lock file.
+    lock_file: str
+    #: The default timeout value.
+    timeout: float
+    #: The mode for the lock files
+    mode: int
+    #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
+    lock_file_fd: int | None = None
+    #: The lock counter is used for implementing the nested locking mechanism.
+    lock_counter: int = 0  # When the lock is acquired is increased and the lock is only released, when this value is 0
+class ThreadLocalFileContext(FileLockContext, local):
+    """A thread local version of the ``FileLockContext`` class."""
+class BaseFileLock(ABC, contextlib.ContextDecorator):
+    """Abstract base class for a file lock object."""
+    _instances: ClassVar[WeakValueDictionary[str, BaseFileLock]] = WeakValueDictionary()
+    def __new__(  # noqa: PLR0913
+        cls,
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,  # noqa: ARG003
+        mode: int = 0o644,  # noqa: ARG003
+        thread_local: bool = True,  # noqa: ARG003, FBT001, FBT002
+        *,
+        is_singleton: bool = False,
+        **kwargs: dict[str, Any],  # capture remaining kwargs for subclasses  # noqa: ARG003
+    ) -> Self:
+        """Create a new lock object or if specified return the singleton instance for the lock file."""
+        if not is_singleton:
+            return super().__new__(cls)
+        instance = cls._instances.get(str(lock_file))
+        if not instance:
+            instance = super().__new__(cls)
+            cls._instances[str(lock_file)] = instance
+        return instance  # type: ignore[return-value] # https://github.com/python/mypy/issues/15322
+    def __init__(  # noqa: PLR0913
+        self,
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,
+        mode: int = 0o644,
+        thread_local: bool = True,  # noqa: FBT001, FBT002
+        *,
+        is_singleton: bool = False,
+    ) -> None:
+        """
+        Create a new lock object.
+        :param lock_file: path to the file
+        :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
+            the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
+            to a negative value. A timeout of 0 means, that there is exactly one attempt to acquire the file lock.
+        :param mode: file permissions for the lockfile
+        :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
+            ``False`` then the lock will be reentrant across threads.
+        :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
+            per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
+            to pass the same object around.
+        """
+        self._is_thread_local = thread_local
+        self._is_singleton = is_singleton
+        # Create the context. Note that external code should not work with the context directly  and should instead use
+        # properties of this class.
+        kwargs: dict[str, Any] = {
+            "lock_file": os.fspath(lock_file),
+            "timeout": timeout,
+            "mode": mode,
+        }
+        self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
+    def is_thread_local(self) -> bool:
+        """:return: a flag indicating if this lock is thread local or not"""
+        return self._is_thread_local
+    @property
+    def is_singleton(self) -> bool:
+        """:return: a flag indicating if this lock is singleton or not"""
+        return self._is_singleton
+    @property
+    def lock_file(self) -> str:
+        """:return: path to the lock file"""
+        return self._context.lock_file
+    @property
+    def timeout(self) -> float:
+        """
+        :return: the default timeout value, in seconds
+        .. versionadded:: 2.0.0
+        """
+        return self._context.timeout
+    @timeout.setter
+    def timeout(self, value: float | str) -> None:
+        """
+        Change the default timeout value.
+        :param value: the new value, in seconds
+        """
+        self._context.timeout = float(value)
+    @abstractmethod
+    def _acquire(self) -> None:
+        """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
+        raise NotImplementedError
+    @abstractmethod
+    def _release(self) -> None:
+        """Releases the lock and sets self._context.lock_file_fd to None."""
+        raise NotImplementedError
+    @property
+    def is_locked(self) -> bool:
+        """
+        :return: A boolean indicating if the lock file is holding the lock currently.
+        .. versionchanged:: 2.0.0
+            This was previously a method and is now a property.
+        """
+        return self._context.lock_file_fd is not None
+    @property
+    def lock_counter(self) -> int:
+        """:return: The number of times this lock has been acquired (but not yet released)."""
+        return self._context.lock_counter
+    def acquire(
+        self,
+        timeout: float | None = None,
+        poll_interval: float = 0.05,
+        *,
+        poll_intervall: float | None = None,
+        blocking: bool = True,
+    ) -> AcquireReturnProxy:
+        """
+        Try to acquire the file lock.
+        :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
+         if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
+        :param poll_interval: interval of trying to acquire the lock file
+        :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
+        :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
+         first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
+        :raises Timeout: if fails to acquire lock within the timeout period
+        :return: a context object that will unlock the file when the context is exited
+        .. code-block:: python
+            # You can use this method in the context manager (recommended)
+            with lock.acquire():
+                pass
+            # Or use an equivalent try-finally construct:
+            lock.acquire()
+            try:
+                pass
+            finally:
+                lock.release()
+        .. versionchanged:: 2.0.0
+            This method returns now a *proxy* object instead of *self*,
+            so that it can be used in a with statement without side effects.
+        """
+        # Use the default timeout, if no timeout is provided.
+        if timeout is None:
+            timeout = self._context.timeout
+        if poll_intervall is not None:
+            msg = "use poll_interval instead of poll_intervall"
+            warnings.warn(msg, DeprecationWarning, stacklevel=2)
+            poll_interval = poll_intervall
+        # Increment the number right at the beginning. We can still undo it, if something fails.
+        self._context.lock_counter += 1
+        lock_id = id(self)
+        lock_filename = self.lock_file
+        start_time = time.perf_counter()
+        try:
+            while True:
+                if not self.is_locked:
+                    _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
+                    self._acquire()
+                if self.is_locked:
+                    _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
+                    break
+                if blocking is False:
+                    _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                if 0 <= timeout < time.perf_counter() - start_time:
+                    _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                msg = "Lock %s not acquired on %s, waiting %s seconds ..."
+                _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
+                time.sleep(poll_interval)
+        except BaseException:  # Something did go wrong, so decrement the counter.
+            self._context.lock_counter = max(0, self._context.lock_counter - 1)
+            raise
+        return AcquireReturnProxy(lock=self)
+    def release(self, force: bool = False) -> None:  # noqa: FBT001, FBT002
+        """
+        Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. Also
+        note, that the lock file itself is not automatically deleted.
+        :param force: If true, the lock counter is ignored and the lock is released in every case/
+        """
+        if self.is_locked:
+            self._context.lock_counter -= 1
+            if self._context.lock_counter == 0 or force:
+                lock_id, lock_filename = id(self), self.lock_file
+                _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
+                self._release()
+                self._context.lock_counter = 0
+                _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
+    def __enter__(self) -> Self:
+        """
+        Acquire the lock.
+        :return: the lock object
+        """
+        self.acquire()
+        return self
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        """
+        Release the lock.
+        :param exc_type: the exception type if raised
+        :param exc_value: the exception value if raised
+        :param traceback: the exception traceback if raised
+        """
+        self.release()
+    def __del__(self) -> None:
+        """Called when the lock object is deleted."""
+        self.release(force=True)
+__all__ = [
+    "BaseFileLock",
+    "AcquireReturnProxy",
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (6.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc ADDED Viewed

Binary file (3.42 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc ADDED Viewed

Binary file (4.75 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc ADDED Viewed

Binary file (983 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc ADDED Viewed

Binary file (21.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc ADDED Viewed

Binary file (17.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc ADDED Viewed

Binary file (88.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import os
+from hashlib import md5
+import pytest
+from fsspec.implementations.local import LocalFileSystem
+from fsspec.tests.abstract.copy import AbstractCopyTests  # noqa
+from fsspec.tests.abstract.get import AbstractGetTests  # noqa
+from fsspec.tests.abstract.put import AbstractPutTests  # noqa
+class BaseAbstractFixtures:
+    """
+    Abstract base class containing fixtures that are used by but never need to
+    be overridden in derived filesystem-specific classes to run the abstract
+    tests on such filesystems.
+    """
+    @pytest.fixture
+    def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_target(self, fs, fs_join, fs_path):
+        """
+        Return name of remote directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = fs_join(fs_path, "target")
+        yield target
+        if fs.exists(target):
+            fs.rm(target, recursive=True)
+    @pytest.fixture
+    def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_dir_and_file_with_same_name_prefix(
+        self, local_fs, local_join, local_path
+    ):
+        """
+        Scenario on local filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(
+            local_fs, local_join, local_path
+        )
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_target(self, local_fs, local_join, local_path):
+        """
+        Return name of local directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = local_join(local_path, "target")
+        yield target
+        if local_fs.exists(target):
+            local_fs.rm(target, recursive=True)
+    def _glob_edge_cases_files(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for glob edge cases cp/get/put tests.
+        Creates the following directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        ├── 📁 subdir0
+        │   ├── 📄 subfile1
+        │   ├── 📄 subfile2
+        │   └── 📁 nesteddir
+        │       └── 📄 nestedfile
+        └── 📁 subdir1
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        for subdir_idx in range(2):
+            subdir = some_join(source, f"subdir{subdir_idx}")
+            nesteddir = some_join(subdir, "nesteddir")
+            some_fs.makedirs(nesteddir)
+            some_fs.touch(some_join(subdir, "subfile1"))
+            some_fs.touch(some_join(subdir, "subfile2"))
+            some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for many cp/get/put tests. Creates the following
+        directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        └── 📁 subdir
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        nesteddir = some_join(subdir, "nesteddir")
+        some_fs.makedirs(nesteddir)
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        some_fs.touch(some_join(subdir, "subfile1"))
+        some_fs.touch(some_join(subdir, "subfile2"))
+        some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put on directory and file with
+        the same name prefixes. Creates the following directory and file structure:
+        📁 source
+        ├── 📄 subdir.txt
+        └── 📁 subdir
+            └── 📄 subfile.txt
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        file = some_join(source, "subdir.txt")
+        subfile = some_join(subdir, "subfile.txt")
+        some_fs.makedirs(subdir)
+        some_fs.touch(file)
+        some_fs.touch(subfile)
+        return source
+    def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put files order when source and
+        destination are lists. Creates the following directory and file structure:
+        📁 source
+        └── 📄 {hashed([0-9])}.txt
+        """
+        source = some_join(some_path, "source")
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            path = some_join(source, f"{hashed_i}.txt")
+            some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
+        return source
+class AbstractFixtures(BaseAbstractFixtures):
+    """
+    Abstract base class containing fixtures that may be overridden in derived
+    filesystem-specific classes to run the abstract tests on such filesystems.
+    For any particular filesystem some of these fixtures must be overridden,
+    such as ``fs`` and ``fs_path``, and others may be overridden if the
+    default functions here are not appropriate, such as ``fs_join``.
+    """
+    @pytest.fixture
+    def fs(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture
+    def fs_join(self):
+        """
+        Return a function that joins its arguments together into a path.
+        Most fsspec implementations join paths in a platform-dependent way,
+        but some will override this to always use a forward slash.
+        """
+        return os.path.join
+    @pytest.fixture
+    def fs_path(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture(scope="class")
+    def local_fs(self):
+        # Maybe need an option for auto_mkdir=False?  This is only relevant
+        # for certain implementations.
+        return LocalFileSystem(auto_mkdir=True)
+    @pytest.fixture
+    def local_join(self):
+        """
+        Return a function that joins its arguments together into a path, on
+        the local filesystem.
+        """
+        return os.path.join
+    @pytest.fixture
+    def local_path(self, tmpdir):
+        return tmpdir
+    @pytest.fixture
+    def supports_empty_directories(self):
+        """
+        Return whether this implementation supports empty directories.
+        """
+        return True
+    @pytest.fixture
+    def fs_sanitize_path(self):
+        return lambda x: x

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc ADDED Viewed

Binary file (26.3 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/common.py ADDED Viewed

	@@ -0,0 +1,175 @@

+GLOB_EDGE_CASES_TESTS = {
+    "argnames": ("path", "recursive", "maxdepth", "expected"),
+    "argvalues": [
+        ("fil?1", False, None, ["file1"]),
+        ("fil?1", True, None, ["file1"]),
+        ("file[1-2]", False, None, ["file1", "file2"]),
+        ("file[1-2]", True, None, ["file1", "file2"]),
+        ("*", False, None, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*", True, 1, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("*1", False, None, ["file1"]),
+        (
+            "*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
+        (
+            "**",
+            False,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**", True, 1, ["file1", "file2"]),
+        (
+            "**",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            False,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        (
+            "**/*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir0/subfile1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**/*1", True, 1, ["file1"]),
+        (
+            "**/*1",
+            True,
+            2,
+            ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
+        ),
+        ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        ("**/subdir0", False, None, []),
+        ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("**/subdir0/nested*", False, 2, []),
+        ("**/subdir0/nested*", True, 2, ["nestedfile"]),
+        ("subdir[1-2]", False, None, []),
+        ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
+        ("subdir[0-1]", False, None, []),
+        (
+            "subdir[0-1]",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            False,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+    ],
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/copy.py ADDED Viewed

	@@ -0,0 +1,557 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractCopyTests:
+    def test_copy_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        target_file2 = fs_join(target, "file2")
+        target_subfile1 = fs_join(target, "subfile1")
+        # Copy from source directory
+        fs.cp(fs_join(source, "file2"), target)
+        assert fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.cp(fs_join(source, "subdir", "subfile1"), target)
+        assert fs.isfile(target_subfile1)
+        # Remove copied files
+        fs.rm([target_file2, target_subfile1])
+        assert not fs.exists(target_file2)
+        assert not fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.cp(fs_join(source, "file2"), target + "/")
+        assert fs.isdir(target)
+        assert fs.isfile(target_file2)
+        fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
+        assert fs.isfile(target_subfile1)
+    def test_copy_file_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
+        )  # Note trailing slash
+        assert fs.isdir(target)
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1c
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
+        assert fs.isfile(fs_join(target, "newfile"))
+    def test_copy_file_to_file_in_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1d
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
+        )
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "newfile"))
+    def test_copy_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1e
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.cp(s, t)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_copy_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1f
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.cp(s, t)
+            if supports_empty_directories:
+                assert fs.ls(target) == []
+            else:
+                with pytest.raises(FileNotFoundError):
+                    fs.ls(target)
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+            assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+    def test_copy_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1g
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isfile(fs_join(target, "subfile1"))
+            assert fs.isfile(fs_join(target, "subfile2"))
+            assert not fs.isdir(fs_join(target, "nesteddir"))
+            assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(
+                [
+                    fs_join(target, "subfile1"),
+                    fs_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+    def test_copy_glob_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1h
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            assert not fs.exists(fs_join(target, "newdir", "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_copy_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_glob_edge_cases_files,
+        fs_target,
+        fs_sanitize_path,
+    ):
+        # Copy scenario 1g
+        source = fs_glob_edge_cases_files
+        target = fs_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            fs.mkdir(target)
+            t = fs_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_copy_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 2a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.cp(source_files, t)
+            assert fs.isfile(fs_join(target, "file1"))
+            assert fs.isfile(fs_join(target, "file2"))
+            assert fs.isfile(fs_join(target, "subfile1"))
+            fs.rm(
+                [
+                    fs_join(target, "file1"),
+                    fs_join(target, "file2"),
+                    fs_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_copy_list_of_files_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 2b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        fs.cp(source_files, fs_join(target, "newdir") + "/")  # Note trailing slash
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "file1"))
+        assert fs.isfile(fs_join(target, "newdir", "file2"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_two_files_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # This is a duplicate of test_copy_list_of_files_to_new_directory and
+        # can eventually be removed.
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        assert not fs.exists(target)
+        fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
+        assert fs.isdir(target)
+        assert fs.isfile(fs_join(target, "file1"))
+        assert fs.isfile(fs_join(target, "file2"))
+    def test_copy_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        fs_dir_and_file_with_same_name_prefix,
+        supports_empty_directories,
+    ):
+        # Create the test dirs
+        source = fs_dir_and_file_with_same_name_prefix
+        target = fs_target
+        # Test without glob
+        fs.cp(fs_join(source, "subdir"), target, recursive=True)
+        assert fs.isfile(fs_join(target, "subfile.txt"))
+        assert not fs.isfile(fs_join(target, "subdir.txt"))
+        fs.rm([fs_join(target, "subfile.txt")])
+        if supports_empty_directories:
+            assert fs.ls(target) == []
+        else:
+            assert not fs.exists(target)
+        # Test with glob
+        fs.cp(fs_join(source, "subdir*"), target, recursive=True)
+        assert fs.isdir(fs_join(target, "subdir"))
+        assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
+        assert fs.isfile(fs_join(target, "subdir.txt"))
+    def test_copy_with_source_and_destination_as_list(
+        self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
+    ):
+        # Create the test dir
+        source = fs_10_files_with_hashed_names
+        target = fs_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(fs_join(source, f"{hashed_i}.txt"))
+            destination_files.append(fs_join(target, f"{hashed_i}.txt"))
+        # Copy and assert order was kept
+        fs.copy(path1=source_files, path2=destination_files)
+        for i in range(10):
+            file_content = fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/approximation.cpython-311.pyc ADDED Viewed

Binary file (12.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/differentiation.cpython-311.pyc ADDED Viewed

Binary file (28.4 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (207 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti.h ADDED Viewed

	@@ -0,0 +1,123 @@

+/*
+ * Copyright 2010-2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(_CUPTI_H_)
+#define _CUPTI_H_
+#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#ifdef NOMINMAX
+#include <windows.h>
+#else
+#define NOMINMAX
+#include <windows.h>
+#undef NOMINMAX
+#endif
+#endif
+#include <cuda.h>
+#include <cupti_result.h>
+#include <cupti_version.h>
+/* Activity, callback, event and metric APIs */
+#include <cupti_activity.h>
+#include <cupti_callbacks.h>
+#include <cupti_events.h>
+#include <cupti_metrics.h>
+/* Runtime, driver, and nvtx function identifiers */
+#include <cupti_driver_cbid.h>
+#include <cupti_runtime_cbid.h>
+#include <cupti_nvtx_cbid.h>
+/* To support function parameter structures for obsoleted API. See
+   cuda.h for the actual definition of these structures. */
+typedef unsigned int CUdeviceptr_v1;
+typedef struct CUDA_MEMCPY2D_v1_st { int dummy; } CUDA_MEMCPY2D_v1;
+typedef struct CUDA_MEMCPY3D_v1_st { int dummy; } CUDA_MEMCPY3D_v1;
+typedef struct CUDA_ARRAY_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY_DESCRIPTOR_v1;
+typedef struct CUDA_ARRAY3D_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY3D_DESCRIPTOR_v1;
+/* Function parameter structures */
+#include <generated_cuda_runtime_api_meta.h>
+#include <generated_cuda_meta.h>
+/* The following parameter structures cannot be included unless a
+   header that defines GL_VERSION is included before including them.
+   If these are needed then make sure such a header is included
+   already. */
+#ifdef GL_VERSION
+#include <generated_cuda_gl_interop_meta.h>
+#include <generated_cudaGL_meta.h>
+#endif
+//#include <generated_nvtx_meta.h>
+/* The following parameter structures cannot be included by default as
+   they are not guaranteed to be available on all systems. Uncomment
+   the includes that are available, or use the include explicitly. */
+#if defined(__linux__)
+//#include <generated_cuda_vdpau_interop_meta.h>
+//#include <generated_cudaVDPAU_meta.h>
+#endif
+#ifdef _WIN32
+//#include <generated_cuda_d3d9_interop_meta.h>
+//#include <generated_cuda_d3d10_interop_meta.h>
+//#include <generated_cuda_d3d11_interop_meta.h>
+//#include <generated_cudaD3D9_meta.h>
+//#include <generated_cudaD3D10_meta.h>
+//#include <generated_cudaD3D11_meta.h>
+#endif
+#endif /*_CUPTI_H_*/

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_callbacks.h ADDED Viewed

	@@ -0,0 +1,760 @@

+/*
+ * Copyright 2010-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(__CUPTI_CALLBACKS_H__)
+#define __CUPTI_CALLBACKS_H__
+#include <cuda.h>
+#include <builtin_types.h>
+#include <string.h>
+#include <cuda_stdint.h>
+#include <cupti_result.h>
+#ifndef CUPTIAPI
+#ifdef _WIN32
+#define CUPTIAPI __stdcall
+#else
+#define CUPTIAPI
+#endif
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \defgroup CUPTI_CALLBACK_API CUPTI Callback API
+ * Functions, types, and enums that implement the CUPTI Callback API.
+ * @{
+ */
+/**
+ * \brief Specifies the point in an API call that a callback is issued.
+ *
+ * Specifies the point in an API call that a callback is issued. This
+ * value is communicated to the callback function via \ref
+ * CUpti_CallbackData::callbackSite.
+ */
+typedef enum {
+  /**
+   * The callback is at the entry of the API call.
+   */
+  CUPTI_API_ENTER                 = 0,
+  /**
+   * The callback is at the exit of the API call.
+   */
+  CUPTI_API_EXIT                  = 1,
+  CUPTI_API_CBSITE_FORCE_INT     = 0x7fffffff
+} CUpti_ApiCallbackSite;
+/**
+ * \brief Callback domains.
+ *
+ * Callback domains. Each domain represents callback points for a
+ * group of related API functions or CUDA driver activity.
+ */
+typedef enum {
+  /**
+   * Invalid domain.
+   */
+  CUPTI_CB_DOMAIN_INVALID           = 0,
+  /**
+   * Domain containing callback points for all driver API functions.
+   */
+  CUPTI_CB_DOMAIN_DRIVER_API        = 1,
+  /**
+   * Domain containing callback points for all runtime API
+   * functions.
+   */
+  CUPTI_CB_DOMAIN_RUNTIME_API       = 2,
+  /**
+   * Domain containing callback points for CUDA resource tracking.
+   */
+  CUPTI_CB_DOMAIN_RESOURCE          = 3,
+  /**
+   * Domain containing callback points for CUDA synchronization.
+   */
+  CUPTI_CB_DOMAIN_SYNCHRONIZE       = 4,
+  /**
+   * Domain containing callback points for NVTX API functions.
+   */
+  CUPTI_CB_DOMAIN_NVTX              = 5,
+  CUPTI_CB_DOMAIN_SIZE              = 6,
+  CUPTI_CB_DOMAIN_FORCE_INT         = 0x7fffffff
+} CUpti_CallbackDomain;
+/**
+ * \brief Callback IDs for resource domain.
+ *
+ * Callback IDs for resource domain, CUPTI_CB_DOMAIN_RESOURCE.  This
+ * value is communicated to the callback function via the \p cbid
+ * parameter.
+ */
+typedef enum {
+  /**
+   * Invalid resource callback ID.
+   */
+  CUPTI_CBID_RESOURCE_INVALID                               = 0,
+  /**
+   * A new context has been created.
+   */
+  CUPTI_CBID_RESOURCE_CONTEXT_CREATED                       = 1,
+  /**
+   * A context is about to be destroyed.
+   */
+  CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING              = 2,
+  /**
+   * A new stream has been created.
+   */
+  CUPTI_CBID_RESOURCE_STREAM_CREATED                        = 3,
+  /**
+   * A stream is about to be destroyed.
+   */
+  CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING               = 4,
+  /**
+   * The driver has finished initializing.
+   */
+  CUPTI_CBID_RESOURCE_CU_INIT_FINISHED                      = 5,
+  /**
+   * A module has been loaded.
+   */
+  CUPTI_CBID_RESOURCE_MODULE_LOADED                         = 6,
+  /**
+   * A module is about to be unloaded.
+   */
+  CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING                = 7,
+  /**
+   * The current module which is being profiled.
+   */
+  CUPTI_CBID_RESOURCE_MODULE_PROFILED                       = 8,
+  /**
+   * CUDA graph has been created.
+   */
+  CUPTI_CBID_RESOURCE_GRAPH_CREATED                         = 9,
+  /**
+   * CUDA graph is about to be destroyed.
+   */
+  CUPTI_CBID_RESOURCE_GRAPH_DESTROY_STARTING                = 10,
+  /**
+   * CUDA graph is cloned.
+   */
+  CUPTI_CBID_RESOURCE_GRAPH_CLONED                          = 11,
+  /**
+   * CUDA graph node is about to be created
+   */
+  CUPTI_CBID_RESOURCE_GRAPHNODE_CREATE_STARTING             = 12,
+  /**
+   * CUDA graph node is created.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHNODE_CREATED                     = 13,
+  /**
+   * CUDA graph node is about to be destroyed.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHNODE_DESTROY_STARTING            = 14,
+  /**
+   * Dependency on a CUDA graph node is created.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_CREATED          = 15,
+  /**
+   * Dependency on a CUDA graph node is destroyed.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_DESTROY_STARTING = 16,
+  /**
+   * An executable CUDA graph is about to be created.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATE_STARTING             = 17,
+  /**
+   * An executable CUDA graph is created.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATED                     = 18,
+  /**
+   * An executable CUDA graph is about to be destroyed.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHEXEC_DESTROY_STARTING            = 19,
+  /**
+   * CUDA graph node is cloned.
+   */
+  CUPTI_CBID_RESOURCE_GRAPHNODE_CLONED                      = 20,
+  CUPTI_CBID_RESOURCE_SIZE,
+  CUPTI_CBID_RESOURCE_FORCE_INT                   = 0x7fffffff
+} CUpti_CallbackIdResource;
+/**
+ * \brief Callback IDs for synchronization domain.
+ *
+ * Callback IDs for synchronization domain,
+ * CUPTI_CB_DOMAIN_SYNCHRONIZE.  This value is communicated to the
+ * callback function via the \p cbid parameter.
+ */
+typedef enum {
+  /**
+   * Invalid synchronize callback ID.
+   */
+  CUPTI_CBID_SYNCHRONIZE_INVALID                  = 0,
+  /**
+   * Stream synchronization has completed for the stream.
+   */
+  CUPTI_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED      = 1,
+  /**
+   * Context synchronization has completed for the context.
+   */
+  CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED     = 2,
+  CUPTI_CBID_SYNCHRONIZE_SIZE,
+  CUPTI_CBID_SYNCHRONIZE_FORCE_INT                = 0x7fffffff
+} CUpti_CallbackIdSync;
+/**
+ * \brief Data passed into a runtime or driver API callback function.
+ *
+ * Data passed into a runtime or driver API callback function as the
+ * \p cbdata argument to \ref CUpti_CallbackFunc. The \p cbdata will
+ * be this type for \p domain equal to CUPTI_CB_DOMAIN_DRIVER_API or
+ * CUPTI_CB_DOMAIN_RUNTIME_API. The callback data is valid only within
+ * the invocation of the callback function that is passed the data. If
+ * you need to retain some data for use outside of the callback, you
+ * must make a copy of that data. For example, if you make a shallow
+ * copy of CUpti_CallbackData within a callback, you cannot
+ * dereference \p functionParams outside of that callback to access
+ * the function parameters. \p functionName is an exception: the
+ * string pointed to by \p functionName is a global constant and so
+ * may be accessed outside of the callback.
+ */
+typedef struct {
+  /**
+   * Point in the runtime or driver function from where the callback
+   * was issued.
+   */
+  CUpti_ApiCallbackSite callbackSite;
+  /**
+   * Name of the runtime or driver API function which issued the
+   * callback. This string is a global constant and so may be
+   * accessed outside of the callback.
+   */
+  const char *functionName;
+  /**
+   * Pointer to the arguments passed to the runtime or driver API
+   * call. See generated_cuda_runtime_api_meta.h and
+   * generated_cuda_meta.h for structure definitions for the
+   * parameters for each runtime and driver API function.
+   */
+  const void *functionParams;
+  /**
+   * Pointer to the return value of the runtime or driver API
+   * call. This field is only valid within the exit::CUPTI_API_EXIT
+   * callback. For a runtime API \p functionReturnValue points to a
+   * \p cudaError_t. For a driver API \p functionReturnValue points
+   * to a \p CUresult.
+   */
+  void *functionReturnValue;
+  /**
+   * Name of the symbol operated on by the runtime or driver API
+   * function which issued the callback. This entry is valid only for
+   * driver and runtime launch callbacks, where it returns the name of
+   * the kernel.
+   */
+  const char *symbolName;
+  /**
+   * Driver context current to the thread, or null if no context is
+   * current. This value can change from the entry to exit callback
+   * of a runtime API function if the runtime initializes a context.
+   */
+  CUcontext context;
+  /**
+   * Unique ID for the CUDA context associated with the thread. The
+   * UIDs are assigned sequentially as contexts are created and are
+   * unique within a process.
+   */
+  uint32_t contextUid;
+  /**
+   * Pointer to data shared between the entry and exit callbacks of
+   * a given runtime or drive API function invocation. This field
+   * can be used to pass 64-bit values from the entry callback to
+   * the corresponding exit callback.
+   */
+  uint64_t *correlationData;
+  /**
+   * The activity record correlation ID for this callback. For a
+   * driver domain callback (i.e. \p domain
+   * CUPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID
+   * in the CUpti_ActivityAPI record corresponding to the CUDA driver
+   * function call. For a runtime domain callback (i.e. \p domain
+   * CUPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation
+   * ID in the CUpti_ActivityAPI record corresponding to the CUDA
+   * runtime function call. Within the callback, this ID can be
+   * recorded to correlate user data with the activity record. This
+   * field is new in 4.1.
+   */
+  uint32_t correlationId;
+} CUpti_CallbackData;
+/**
+ * \brief Data passed into a resource callback function.
+ *
+ * Data passed into a resource callback function as the \p cbdata
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The callback
+ * data is valid only within the invocation of the callback function
+ * that is passed the data. If you need to retain some data for use
+ * outside of the callback, you must make a copy of that data.
+ */
+typedef struct {
+  /**
+   * For CUPTI_CBID_RESOURCE_CONTEXT_CREATED and
+   * CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING, the context being
+   * created or destroyed. For CUPTI_CBID_RESOURCE_STREAM_CREATED and
+   * CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the context
+   * containing the stream being created or destroyed.
+   */
+  CUcontext context;
+  union {
+    /**
+     * For CUPTI_CBID_RESOURCE_STREAM_CREATED and
+     * CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the stream being
+     * created or destroyed.
+     */
+    CUstream stream;
+  } resourceHandle;
+  /**
+   * Reserved for future use.
+   */
+  void *resourceDescriptor;
+} CUpti_ResourceData;
+/**
+ * \brief Module data passed into a resource callback function.
+ *
+ * CUDA module data passed into a resource callback function as the \p cbdata
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The module
+ * data is valid only within the invocation of the callback function
+ * that is passed the data. If you need to retain some data for use
+ * outside of the callback, you must make a copy of that data.
+ */
+typedef struct {
+  /**
+   * Identifier to associate with the CUDA module.
+   */
+    uint32_t moduleId;
+  /**
+   * The size of the cubin.
+   */
+    size_t cubinSize;
+  /**
+   * Pointer to the associated cubin.
+   */
+    const char *pCubin;
+} CUpti_ModuleResourceData;
+/**
+ * \brief CUDA graphs data passed into a resource callback function.
+ *
+ * CUDA graphs data passed into a resource callback function as the \p cbdata
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The graph
+ * data is valid only within the invocation of the callback function
+ * that is passed the data. If you need to retain some data for use
+ * outside of the callback, you must make a copy of that data.
+ */
+typedef struct {
+  /**
+   * CUDA graph
+   */
+    CUgraph graph;
+  /**
+   * The original CUDA graph from which \param graph is cloned
+   */
+    CUgraph originalGraph;
+  /**
+   * CUDA graph node
+   */
+    CUgraphNode node;
+  /**
+   * The original CUDA graph node from which \param node is cloned
+   */
+    CUgraphNode originalNode;
+  /**
+   * Type of the \param node
+   */
+    CUgraphNodeType nodeType;
+  /**
+   * The dependent graph node
+   * The size of the array is \param numDependencies.
+   */
+    CUgraphNode dependency;
+  /**
+   * CUDA executable graph
+   */
+    CUgraphExec graphExec;
+} CUpti_GraphData;
+/**
+ * \brief Data passed into a synchronize callback function.
+ *
+ * Data passed into a synchronize callback function as the \p cbdata
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
+ * type for \p domain equal to CUPTI_CB_DOMAIN_SYNCHRONIZE. The
+ * callback data is valid only within the invocation of the callback
+ * function that is passed the data. If you need to retain some data
+ * for use outside of the callback, you must make a copy of that data.
+ */
+typedef struct {
+  /**
+   * The context of the stream being synchronized.
+   */
+  CUcontext context;
+  /**
+   * The stream being synchronized.
+   */
+  CUstream  stream;
+} CUpti_SynchronizeData;
+/**
+ * \brief Data passed into a NVTX callback function.
+ *
+ * Data passed into a NVTX callback function as the \p cbdata argument
+ * to \ref CUpti_CallbackFunc. The \p cbdata will be this type for \p
+ * domain equal to CUPTI_CB_DOMAIN_NVTX. Unless otherwise notes, the
+ * callback data is valid only within the invocation of the callback
+ * function that is passed the data. If you need to retain some data
+ * for use outside of the callback, you must make a copy of that data.
+ */
+typedef struct {
+  /**
+   * Name of the NVTX API function which issued the callback. This
+   * string is a global constant and so may be accessed outside of the
+   * callback.
+   */
+  const char *functionName;
+  /**
+   * Pointer to the arguments passed to the NVTX API call. See
+   * generated_nvtx_meta.h for structure definitions for the
+   * parameters for each NVTX API function.
+   */
+  const void *functionParams;
+  /**
+   * Pointer to the return value of the NVTX API call. See
+   * nvToolsExt.h for each NVTX API function's return value.
+   */
+  const void *functionReturnValue;
+} CUpti_NvtxData;
+/**
+ * \brief An ID for a driver API, runtime API, resource or
+ * synchronization callback.
+ *
+ * An ID for a driver API, runtime API, resource or synchronization
+ * callback. Within a driver API callback this should be interpreted
+ * as a CUpti_driver_api_trace_cbid value (these values are defined in
+ * cupti_driver_cbid.h). Within a runtime API callback this should be
+ * interpreted as a CUpti_runtime_api_trace_cbid value (these values
+ * are defined in cupti_runtime_cbid.h). Within a resource API
+ * callback this should be interpreted as a \ref
+ * CUpti_CallbackIdResource value. Within a synchronize API callback
+ * this should be interpreted as a \ref CUpti_CallbackIdSync value.
+ */
+typedef uint32_t CUpti_CallbackId;
+/**
+ * \brief Function type for a callback.
+ *
+ * Function type for a callback. The type of the data passed to the
+ * callback in \p cbdata depends on the \p domain. If \p domain is
+ * CUPTI_CB_DOMAIN_DRIVER_API or CUPTI_CB_DOMAIN_RUNTIME_API the type
+ * of \p cbdata will be CUpti_CallbackData. If \p domain is
+ * CUPTI_CB_DOMAIN_RESOURCE the type of \p cbdata will be
+ * CUpti_ResourceData. If \p domain is CUPTI_CB_DOMAIN_SYNCHRONIZE the
+ * type of \p cbdata will be CUpti_SynchronizeData. If \p domain is
+ * CUPTI_CB_DOMAIN_NVTX the type of \p cbdata will be CUpti_NvtxData.
+ *
+ * \param userdata User data supplied at subscription of the callback
+ * \param domain The domain of the callback
+ * \param cbid The ID of the callback
+ * \param cbdata Data passed to the callback.
+ */
+typedef void (CUPTIAPI *CUpti_CallbackFunc)(
+    void *userdata,
+    CUpti_CallbackDomain domain,
+    CUpti_CallbackId cbid,
+    const void *cbdata);
+/**
+ * \brief A callback subscriber.
+ */
+typedef struct CUpti_Subscriber_st *CUpti_SubscriberHandle;
+/**
+ * \brief Pointer to an array of callback domains.
+ */
+typedef CUpti_CallbackDomain *CUpti_DomainTable;
+/**
+ * \brief Get the available callback domains.
+ *
+ * Returns in \p *domainTable an array of size \p *domainCount of all
+ * the available callback domains.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param domainCount Returns number of callback domains
+ * \param domainTable Returns pointer to array of available callback domains
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p domainCount or \p domainTable are NULL
+ */
+CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount,
+                                           CUpti_DomainTable *domainTable);
+/**
+ * \brief Initialize a callback subscriber with a callback function
+ * and user data.
+ *
+ * Initializes a callback subscriber with a callback function and
+ * (optionally) a pointer to user data. The returned subscriber handle
+ * can be used to enable and disable the callback for specific domains
+ * and callback IDs.
+ * \note Only a single subscriber can be registered at a time. To ensure
+ * that no other CUPTI client interrupts the profiling session, it's the
+ * responsibility of all the CUPTI clients to call this function before
+ * starting the profling session. In case profiling session is already
+ * started by another CUPTI client, this function returns the error code
+ * CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED.
+ * Note that this function returns the same error when application is
+ * launched using NVIDIA tools like nvprof, Visual Profiler, Nsight Systems,
+ * Nsight Compute, cuda-gdb and cuda-memcheck.
+ * \note This function does not enable any callbacks.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param subscriber Returns handle to initialize subscriber
+ * \param callback The callback function
+ * \param userdata A pointer to user data. This data will be passed to
+ * the callback function via the \p userdata paramater.
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
+ * \retval CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is already a CUPTI subscriber
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is NULL
+ */
+CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber,
+                                    CUpti_CallbackFunc callback,
+                                    void *userdata);
+/**
+ * \brief Unregister a callback subscriber.
+ *
+ * Removes a callback subscriber so that no future callbacks will be
+ * issued to that subscriber.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param subscriber Handle to the initialize subscriber
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is NULL or not initialized
+ */
+CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber);
+/**
+ * \brief Get the current enabled/disabled state of a callback for a specific
+ * domain and function ID.
+ *
+ * Returns non-zero in \p *enable if the callback for a domain and
+ * callback ID is enabled, and zero if not enabled.
+ *
+ * \note \b Thread-safety: a subscriber must serialize access to
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
+ * d, c) and cuptiEnableCallback(sub, d, c) are called concurrently,
+ * the results are undefined.
+ *
+ * \param enable Returns non-zero if callback enabled, zero if not enabled
+ * \param subscriber Handle to the initialize subscriber
+ * \param domain The domain of the callback
+ * \param cbid The ID of the callback
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p enabled is NULL, or if \p
+ * subscriber, \p domain or \p cbid is invalid.
+ */
+CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable,
+                                           CUpti_SubscriberHandle subscriber,
+                                           CUpti_CallbackDomain domain,
+                                           CUpti_CallbackId cbid);
+/**
+ * \brief Enable or disabled callbacks for a specific domain and
+ * callback ID.
+ *
+ * Enable or disabled callbacks for a subscriber for a specific domain
+ * and callback ID.
+ *
+ * \note \b Thread-safety: a subscriber must serialize access to
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
+ * d, c) and cuptiEnableCallback(sub, d, c) are called concurrently,
+ * the results are undefined.
+ *
+ * \param enable New enable state for the callback. Zero disables the
+ * callback, non-zero enables the callback.
+ * \param subscriber - Handle to callback subscription
+ * \param domain The domain of the callback
+ * \param cbid The ID of the callback
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber, \p domain or \p
+ * cbid is invalid.
+ */
+CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable,
+                                         CUpti_SubscriberHandle subscriber,
+                                         CUpti_CallbackDomain domain,
+                                         CUpti_CallbackId cbid);
+/**
+ * \brief Enable or disabled all callbacks for a specific domain.
+ *
+ * Enable or disabled all callbacks for a specific domain.
+ *
+ * \note \b Thread-safety: a subscriber must serialize access to
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackEnabled(sub,
+ * d, *) and cuptiEnableDomain(sub, d) are called concurrently, the
+ * results are undefined.
+ *
+ * \param enable New enable state for all callbacks in the
+ * domain. Zero disables all callbacks, non-zero enables all
+ * callbacks.
+ * \param subscriber - Handle to callback subscription
+ * \param domain The domain of the callback
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber or \p domain is invalid
+ */
+CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable,
+                                       CUpti_SubscriberHandle subscriber,
+                                       CUpti_CallbackDomain domain);
+/**
+ * \brief Enable or disable all callbacks in all domains.
+ *
+ * Enable or disable all callbacks in all domains.
+ *
+ * \note \b Thread-safety: a subscriber must serialize access to
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
+ * d, *) and cuptiEnableAllDomains(sub) are called concurrently, the
+ * results are undefined.
+ *
+ * \param enable New enable state for all callbacks in all
+ * domain. Zero disables all callbacks, non-zero enables all
+ * callbacks.
+ * \param subscriber - Handle to callback subscription
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is invalid
+ */
+CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable,
+                                           CUpti_SubscriberHandle subscriber);
+/**
+ * \brief Get the name of a callback for a specific domain and callback ID.
+ *
+ * Returns a pointer to the name c_string in \p **name.
+ *
+ * \note \b Names are available only for the DRIVER and RUNTIME domains.
+ *
+ * \param domain The domain of the callback
+ * \param cbid The ID of the callback
+ * \param name Returns pointer to the name string on success, NULL otherwise
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p name is NULL, or if
+ * \p domain or \p cbid is invalid.
+ */
+CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain,
+                                          uint32_t cbid,
+                                          const char **name);
+/** @} */ /* END CUPTI_CALLBACK_API */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#if defined(__cplusplus)
+}
+#endif
+#endif  // file guard

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_checkpoint.h ADDED Viewed

	@@ -0,0 +1,127 @@

+#pragma once
+#include <cuda.h>
+#include <cupti_result.h>
+#include <stddef.h>
+#include <stdint.h>
+namespace NV { namespace Cupti { namespace Checkpoint {
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+/**
+ * \defgroup CUPTI_CHECKPOINT_API CUPTI Checkpoint API
+ * Functions, types, and enums that implement the CUPTI Checkpoint API.
+ * @{
+ */
+/**
+ * \brief Specifies optimization options for a checkpoint, may be OR'd together to specify multiple options.
+ */
+typedef enum
+{
+    CUPTI_CHECKPOINT_OPT_NONE     = 0, //!< Default behavior
+    CUPTI_CHECKPOINT_OPT_TRANSFER = 1, //!< Determine which mem blocks have changed, and only restore those. This optimization is cached, which means cuptiCheckpointRestore must always be called at the same point in the application when this option is enabled, or the result may be incorrect.
+} CUpti_CheckpointOptimizations;
+/**
+ * \brief Configuration and handle for a CUPTI Checkpoint
+ *
+ * A CUptiCheckpoint object should be initialized with desired options prior to passing into any
+ * CUPTI Checkpoint API function.  The first call into a Checkpoint API function will initialize internal
+ * state based on these options.  Subsequent changes to these options will not have any effect.
+ *
+ * Checkpoint data is saved in device, host, and filesystem space.  There are options to reserve memory
+ * at each level (device, host, filesystem) which are intended to allow a guarantee that a certain amount
+ * of memory will remain free for use after the checkpoint is saved.
+ * Note, however, that falling back to slower levels of memory (host, and then filesystem) to save the checkpoint
+ * will result in performance degradation.
+ * Currently, the filesystem limitation is not implemented.  Note that falling back to filesystem storage may
+ * significantly impact the performance for saving and restoring a checkpoint.
+ */
+typedef struct
+{
+   size_t structSize;      //!< [in] Must be set to CUpti_Checkpoint_STRUCT_SIZE
+   CUcontext ctx;          //!< [in] Set to context to save from, or will use current context if NULL
+   size_t reserveDeviceMB; //!< [in] Restrict checkpoint from using last N MB of device memory (-1 = use no device memory)
+   size_t reserveHostMB;   //!< [in] Restrict checkpoint from using last N MB of host memory (-1 = use no host memory)
+   uint8_t allowOverwrite; //!< [in] Boolean, Allow checkpoint to save over existing checkpoint
+   uint8_t optimizations;  //!< [in] Mask of CUpti_CheckpointOptimizations flags for this checkpoint
+   void * pPriv;           //!< [in] Assign to NULL
+} CUpti_Checkpoint;
+#define CUpti_Checkpoint_STRUCT_SIZE  \
+(offsetof(CUpti_Checkpoint, pPriv) +  \
+sizeof(((CUpti_Checkpoint*)(nullptr))->pPriv))
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \brief Initialize and save a checkpoint of the device state associated with the handle context
+ *
+ * Uses the handle options to configure and save a checkpoint of the device state associated with the specified context.
+ *
+ * \param handle A pointer to a CUpti_Checkpoint object
+ *
+ * \retval CUPTI_SUCCESS if a checkpoint was successfully initialized and saved
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p handle does not appear to refer to a valid CUpti_Checkpoint
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
+ * \retval CUPTI_ERROR_INVALID_DEVICE if device associated with context is not compatible with checkpoint API
+ * \retval CUPTI_ERROR_INVALID_OPERATION if Save is requested over an existing checkpoint, but \p allowOverwrite was not originally specified
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY if as configured, not enough backing storage space to save the checkpoint
+ */
+CUptiResult cuptiCheckpointSave(CUpti_Checkpoint * const handle);
+/**
+ * \brief Restore a checkpoint to the device associated with its context
+ *
+ * Restores device, pinned, and allocated memory to the state when the checkpoint was saved
+ *
+ * \param handle A pointer to a previously saved CUpti_Checkpoint object
+ *
+ * \retval CUTPI_SUCCESS if the checkpoint was successfully restored
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if the checkpoint was not previously initialized
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if the handle appears invalid
+ * \retval CUPTI_ERROR_UNKNOWN if the restore or optimization operation fails
+ */
+CUptiResult cuptiCheckpointRestore(CUpti_Checkpoint * const handle);
+/**
+ * \brief Free the backing data for a checkpoint
+ *
+ * Frees all associated device, host memory and filesystem storage used for this context.
+ * After freeing a handle, it may be re-used as if it was new - options may be re-configured and will
+ * take effect on the next call to \p cuptiCheckpointSave.
+ *
+ * \param handle A pointer to a previously saved CUpti_Checkpoint object
+ *
+ * \retval CUPTI_SUCCESS if the handle was successfully freed
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if the handle was already freed or appears invalid
+ * \retval CUPTI_ERROR_INVALID_CONTEXT if the context is no longer valid
+ */
+CUptiResult cuptiCheckpointFree(CUpti_Checkpoint * const handle);
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif
+// Exit namespace NV::Cupti::Checkpoint
+}}}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_nvtx_cbid.h ADDED Viewed

	@@ -0,0 +1,111 @@

+/*
+ * Copyright 2013-2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+typedef enum {
+  CUPTI_CBID_NVTX_INVALID                               = 0,
+  CUPTI_CBID_NVTX_nvtxMarkA                             = 1,
+  CUPTI_CBID_NVTX_nvtxMarkW                             = 2,
+  CUPTI_CBID_NVTX_nvtxMarkEx                            = 3,
+  CUPTI_CBID_NVTX_nvtxRangeStartA                       = 4,
+  CUPTI_CBID_NVTX_nvtxRangeStartW                       = 5,
+  CUPTI_CBID_NVTX_nvtxRangeStartEx                      = 6,
+  CUPTI_CBID_NVTX_nvtxRangeEnd                          = 7,
+  CUPTI_CBID_NVTX_nvtxRangePushA                        = 8,
+  CUPTI_CBID_NVTX_nvtxRangePushW                        = 9,
+  CUPTI_CBID_NVTX_nvtxRangePushEx                       = 10,
+  CUPTI_CBID_NVTX_nvtxRangePop                          = 11,
+  CUPTI_CBID_NVTX_nvtxNameCategoryA                     = 12,
+  CUPTI_CBID_NVTX_nvtxNameCategoryW                     = 13,
+  CUPTI_CBID_NVTX_nvtxNameOsThreadA                     = 14,
+  CUPTI_CBID_NVTX_nvtxNameOsThreadW                     = 15,
+  CUPTI_CBID_NVTX_nvtxNameCuDeviceA                     = 16,
+  CUPTI_CBID_NVTX_nvtxNameCuDeviceW                     = 17,
+  CUPTI_CBID_NVTX_nvtxNameCuContextA                    = 18,
+  CUPTI_CBID_NVTX_nvtxNameCuContextW                    = 19,
+  CUPTI_CBID_NVTX_nvtxNameCuStreamA                     = 20,
+  CUPTI_CBID_NVTX_nvtxNameCuStreamW                     = 21,
+  CUPTI_CBID_NVTX_nvtxNameCuEventA                      = 22,
+  CUPTI_CBID_NVTX_nvtxNameCuEventW                      = 23,
+  CUPTI_CBID_NVTX_nvtxNameCudaDeviceA                   = 24,
+  CUPTI_CBID_NVTX_nvtxNameCudaDeviceW                   = 25,
+  CUPTI_CBID_NVTX_nvtxNameCudaStreamA                   = 26,
+  CUPTI_CBID_NVTX_nvtxNameCudaStreamW                   = 27,
+  CUPTI_CBID_NVTX_nvtxNameCudaEventA                    = 28,
+  CUPTI_CBID_NVTX_nvtxNameCudaEventW                    = 29,
+  CUPTI_CBID_NVTX_nvtxDomainMarkEx                      = 30,
+  CUPTI_CBID_NVTX_nvtxDomainRangeStartEx                = 31,
+  CUPTI_CBID_NVTX_nvtxDomainRangeEnd                    = 32,
+  CUPTI_CBID_NVTX_nvtxDomainRangePushEx                 = 33,
+  CUPTI_CBID_NVTX_nvtxDomainRangePop                    = 34,
+  CUPTI_CBID_NVTX_nvtxDomainResourceCreate              = 35,
+  CUPTI_CBID_NVTX_nvtxDomainResourceDestroy             = 36,
+  CUPTI_CBID_NVTX_nvtxDomainNameCategoryA               = 37,
+  CUPTI_CBID_NVTX_nvtxDomainNameCategoryW               = 38,
+  CUPTI_CBID_NVTX_nvtxDomainRegisterStringA             = 39,
+  CUPTI_CBID_NVTX_nvtxDomainRegisterStringW             = 40,
+  CUPTI_CBID_NVTX_nvtxDomainCreateA                     = 41,
+  CUPTI_CBID_NVTX_nvtxDomainCreateW                     = 42,
+  CUPTI_CBID_NVTX_nvtxDomainDestroy                     = 43,
+  CUPTI_CBID_NVTX_nvtxDomainSyncUserCreate              = 44,
+  CUPTI_CBID_NVTX_nvtxDomainSyncUserDestroy             = 45,
+  CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireStart        = 46,
+  CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireFailed       = 47,
+  CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireSuccess      = 48,
+  CUPTI_CBID_NVTX_nvtxDomainSyncUserReleasing           = 49,
+  CUPTI_CBID_NVTX_SIZE,
+  CUPTI_CBID_NVTX_FORCE_INT                             = 0x7fffffff
+} CUpti_nvtx_api_trace_cbid;
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_profiler_target.h ADDED Viewed

	@@ -0,0 +1,588 @@

+/*
+ * Copyright 2011-2020   NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(_CUPTI_PROFILER_TARGET_H_)
+#define _CUPTI_PROFILER_TARGET_H_
+#include <cuda.h>
+#include <cupti_result.h>
+#include <stddef.h>
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \defgroup CUPTI_PROFILER_API CUPTI Profiling API
+ * Functions, types, and enums that implement the CUPTI Profiling API.
+ * @{
+ */
+#ifndef CUPTI_PROFILER_STRUCT_SIZE
+#define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_)                     (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
+#endif
+/**
+ * \brief Profiler range attribute
+ *
+ * A metric enabled in the session's configuration is collected separately per unique range-stack in the pass.
+ * This is an attribute to collect metrics around each kernel in a profiling session or in an user defined range.
+ */
+typedef enum
+{
+    /**
+     * Invalid value
+     */
+    CUPTI_Range_INVALID,
+    /**
+     * Ranges are auto defined around each kernel in a profiling session
+     */
+    CUPTI_AutoRange,
+    /**
+     * A range in which metric data to be collected is defined by the user
+     */
+    CUPTI_UserRange,
+    /**
+     * Range count
+     */
+    CUPTI_Range_COUNT,
+} CUpti_ProfilerRange;
+/**
+ * \brief Profiler replay attribute
+ *
+ * For metrics which require multipass collection, a replay of the GPU kernel(s) is required.
+ * This is an attribute which specify how the replay of the kernel(s) to be measured is done.
+ */
+typedef enum
+{
+    /**
+     * Invalid Value
+     */
+    CUPTI_Replay_INVALID,
+    /**
+     * Replay is done by CUPTI user around the process
+     */
+    CUPTI_ApplicationReplay,
+    /**
+     * Replay is done around kernel implicitly by CUPTI
+     */
+    CUPTI_KernelReplay,
+    /**
+     * Replay is done by CUPTI user within a process
+     */
+    CUPTI_UserReplay,
+    /**
+     * Replay count
+     */
+    CUPTI_Replay_COUNT,
+} CUpti_ProfilerReplayMode;
+/**
+ * \brief Default parameter for cuptiProfilerInitialize
+ */
+typedef struct CUpti_Profiler_Initialize_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_Initialize_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+} CUpti_Profiler_Initialize_Params;
+#define CUpti_Profiler_Initialize_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Initialize_Params, pPriv)
+/**
+ * \brief Default parameter for cuptiProfilerDeInitialize
+ */
+typedef struct CUpti_Profiler_DeInitialize_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+} CUpti_Profiler_DeInitialize_Params;
+#define CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeInitialize_Params, pPriv)
+/**
+ * \brief Initializes the profiler interface
+ *
+ * Loads the required libraries in the process address space.
+ * Sets up the hooks with the CUDA driver.
+ */
+CUptiResult CUPTIAPI cuptiProfilerInitialize(CUpti_Profiler_Initialize_Params *pParams);
+/**
+ * \brief DeInitializes the profiler interface
+ */
+CUptiResult CUPTIAPI cuptiProfilerDeInitialize(CUpti_Profiler_DeInitialize_Params *pParams);
+/**
+ * \brief Input parameter to define the counterDataImage
+ */
+typedef struct CUpti_Profiler_CounterDataImageOptions
+{
+    size_t structSize;                                          //!< [in] CUpti_Profiler_CounterDataImageOptions_Params_STRUCT_SIZE
+    void* pPriv;                                                //!< [in] assign to NULL
+    const uint8_t* pCounterDataPrefix;                          /**< [in] Address of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
+                                                                    Must be align(8).*/
+    size_t counterDataPrefixSize;                               //!< [in] Size of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
+    uint32_t maxNumRanges;                                      //!< [in] Maximum number of ranges that can be profiled
+    uint32_t maxNumRangeTreeNodes;                              //!< [in] Maximum number of RangeTree nodes; must be >= maxNumRanges
+    uint32_t maxRangeNameLength;                                //!< [in] Maximum string length of each RangeName, including the trailing NULL character
+} CUpti_Profiler_CounterDataImageOptions;
+#define CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE                       CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImageOptions, maxRangeNameLength)
+/**
+ * \brief Params for cuptiProfilerCounterDataImageCalculateSize
+ */
+typedef struct CUpti_Profiler_CounterDataImage_CalculateSize_Params
+{
+    size_t structSize;                                          //!< [in] CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE
+    void* pPriv;                                                //!< [in] assign to NULL
+    size_t sizeofCounterDataImageOptions;                       //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
+    const CUpti_Profiler_CounterDataImageOptions* pOptions;     //!< [in] Pointer to Counter Data Image Options
+    size_t counterDataImageSize;                                //!< [out]
+} CUpti_Profiler_CounterDataImage_CalculateSize_Params;
+#define CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE         CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateSize_Params, counterDataImageSize)
+/**
+ * \brief Params for cuptiProfilerCounterDataImageInitialize
+ */
+typedef struct CUpti_Profiler_CounterDataImage_Initialize_Params
+{
+    size_t structSize;                                          //!< [in] CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE
+    void* pPriv;                                                //!< [in] assign to NULL
+    size_t sizeofCounterDataImageOptions;                       //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
+    const CUpti_Profiler_CounterDataImageOptions* pOptions;     //!< [in] Pointer to Counter Data Image Options
+    size_t counterDataImageSize;                                //!< [in] Size calculated from cuptiProfilerCounterDataImageCalculateSize
+    uint8_t* pCounterDataImage;                                 //!< [in] The buffer to be initialized.
+} CUpti_Profiler_CounterDataImage_Initialize_Params;
+#define CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE            CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_Initialize_Params, pCounterDataImage)
+/**
+ * \brief A CounterData image allocates space for values for each counter for each range.
+ *
+ * User borne the resposibility of managing the counterDataImage allocations.
+ * CounterDataPrefix contains meta data about the metrics that will be stored in counterDataImage.
+ * Use these APIs to calculate the allocation size and initialize counterData image.
+ */
+CUptiResult cuptiProfilerCounterDataImageCalculateSize(CUpti_Profiler_CounterDataImage_CalculateSize_Params* pParams);
+CUptiResult cuptiProfilerCounterDataImageInitialize(CUpti_Profiler_CounterDataImage_Initialize_Params* pParams);
+/**
+ * \brief Params for cuptiProfilerCounterDataImageCalculateScratchBufferSize
+ */
+typedef struct CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    size_t counterDataImageSize;                            //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+    uint8_t* pCounterDataImage;                             //!< [in]
+    size_t counterDataScratchBufferSize;                    //!< [out]
+} CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params;
+#define CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE    CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params, counterDataScratchBufferSize)
+/**
+ * \brief Params for cuptiProfilerCounterDataImageInitializeScratchBuffer
+ */
+typedef struct CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    size_t counterDataImageSize;                            //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+    uint8_t* pCounterDataImage;                             //!< [in]
+    size_t counterDataScratchBufferSize;                    //!< [in] size calculated using cuptiProfilerCounterDataImageCalculateScratchBufferSize
+    uint8_t* pCounterDataScratchBuffer;                     //!< [in] the scratch buffer to be initialized.
+} CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params;
+#define CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE       CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params, pCounterDataScratchBuffer)
+/**
+ * \brief A temporary storage for CounterData image needed for internal operations
+ *
+ * Use these APIs to calculate the allocation size and initialize counterData image scratch buffer.
+ */
+CUptiResult cuptiProfilerCounterDataImageCalculateScratchBufferSize(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params* pParams);
+CUptiResult cuptiProfilerCounterDataImageInitializeScratchBuffer(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params* pParams);
+/**
+ * \brief Params for cuptiProfilerBeginSession
+ */
+typedef struct CUpti_Profiler_BeginSession_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_BeginSession_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+    size_t counterDataImageSize;                            //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
+    uint8_t* pCounterDataImage;                             //!< [in] address of CounterDataImage
+    size_t counterDataScratchBufferSize;                    //!< [in] size calculated from cuptiProfilerCounterDataImageInitializeScratchBuffer
+    uint8_t* pCounterDataScratchBuffer;                     //!< [in] address of CounterDataImage scratch buffer
+    uint8_t bDumpCounterDataInFile;                          //!< [in] [optional]
+    const char* pCounterDataFilePath;                        //!< [in] [optional]
+    CUpti_ProfilerRange range;                               //!< [in] CUpti_ProfilerRange
+    CUpti_ProfilerReplayMode replayMode;                     //!< [in] CUpti_ProfilerReplayMode
+    /* Replay options, required when replay is done by cupti user */
+    size_t maxRangesPerPass;                                //!< [in] Maximum number of ranges that can be recorded in a single pass.
+    size_t maxLaunchesPerPass;                              //!< [in] Maximum number of kernel launches that can be recorded in a single pass; must be >= maxRangesPerPass.
+} CUpti_Profiler_BeginSession_Params;
+#define CUpti_Profiler_BeginSession_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginSession_Params, maxLaunchesPerPass)
+/**
+ * \brief Params for cuptiProfilerEndSession
+ */
+typedef struct CUpti_Profiler_EndSession_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_EndSession_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+} CUpti_Profiler_EndSession_Params;
+#define CUpti_Profiler_EndSession_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndSession_Params, ctx)
+/**
+ * \brief Begin profiling session sets up the profiling on the device
+ *
+ * Although, it doesn't start the profiling but GPU resources needed for profiling are allocated.
+ * Outside of a session, the GPU will return to its normal operating state.
+ */
+CUptiResult CUPTIAPI cuptiProfilerBeginSession(CUpti_Profiler_BeginSession_Params* pParams);
+/**
+ * \brief Ends profiling session
+ *
+ * Frees up the GPU resources acquired for profiling.
+ * Outside of a session, the GPU will return to it's normal operating state.
+ */
+CUptiResult CUPTIAPI cuptiProfilerEndSession(CUpti_Profiler_EndSession_Params* pParams);
+/**
+ * \brief Params for cuptiProfilerSetConfig
+ */
+typedef struct CUpti_Profiler_SetConfig_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_SetConfig_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+    const uint8_t* pConfig;                                 //!< [in] Config created by NVPW_RawMetricsConfig_GetConfigImage(). Must be align(8).
+    size_t configSize;                                      //!< [in] size of config
+    uint16_t minNestingLevel;                               //!< [in] the lowest nesting level to be profiled; must be >= 1
+    uint16_t numNestingLevels;                              //!< [in] the number of nesting levels to profile; must be >= 1
+    size_t passIndex;                                       //!< [in] Set this to zero for in-app replay; set this to the output of EndPass() for application replay
+    uint16_t targetNestingLevel;                            //!< [in] Set this to minNestingLevel for in-app replay; set this to the output of EndPass() for application
+} CUpti_Profiler_SetConfig_Params;
+#define CUpti_Profiler_SetConfig_Params_STRUCT_SIZE                    CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_SetConfig_Params, targetNestingLevel)
+/**
+ * \brief Params for cuptiProfilerUnsetConfig
+ */
+typedef struct CUpti_Profiler_UnsetConfig_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+} CUpti_Profiler_UnsetConfig_Params;
+#define CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_UnsetConfig_Params, ctx)
+/**
+ * \brief Set metrics configuration to be profiled
+ *
+ * Use these APIs to set the config to profile in a session. It can be used for advanced cases such as where multiple
+ * configurations are collected into a single CounterData Image on the need basis, without restarting the session.
+ */
+CUptiResult CUPTIAPI cuptiProfilerSetConfig(CUpti_Profiler_SetConfig_Params* pParams);
+/**
+ * \brief Unset metrics configuration profiled
+ *
+ */
+CUptiResult CUPTIAPI cuptiProfilerUnsetConfig(CUpti_Profiler_UnsetConfig_Params* pParams);
+/**
+ * \brief Params for cuptiProfilerBeginPass
+ */
+typedef struct CUpti_Profiler_BeginPass_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_BeginPass_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+} CUpti_Profiler_BeginPass_Params;
+#define CUpti_Profiler_BeginPass_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginPass_Params, ctx)
+/**
+ * \brief Params for cuptiProfilerEndPass
+ */
+typedef struct CUpti_Profiler_EndPass_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_EndPass_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+    uint16_t targetNestingLevel;                            //!  [out] The targetNestingLevel that will be collected by the *next* BeginPass.
+    size_t passIndex;                                       //!< [out] The passIndex that will be collected by the *next* BeginPass
+    uint8_t allPassesSubmitted;                             //!< [out] becomes true when the last pass has been queued to the GPU
+} CUpti_Profiler_EndPass_Params;
+#define CUpti_Profiler_EndPass_Params_STRUCT_SIZE                    CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndPass_Params, allPassesSubmitted)
+/**
+ * \brief Replay API: used for multipass collection.
+ * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay
+ * for multipass collection of the metrics configurations.
+ * It's a no-op in case of \ref CUPTI_KernelReplay.
+ */
+CUptiResult cuptiProfilerBeginPass(CUpti_Profiler_BeginPass_Params* pParams);
+/**
+ * \brief Replay API: used for multipass collection.
+ * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay
+ * for multipass collection of the metrics configurations.
+ * Its a no-op in case of \ref CUPTI_KernelReplay.
+ * Returns information for next pass.
+ */
+CUptiResult cuptiProfilerEndPass(CUpti_Profiler_EndPass_Params* pParams);
+/**
+ * \brief Params for cuptiProfilerEnableProfiling
+ */
+typedef struct CUpti_Profiler_EnableProfiling_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+} CUpti_Profiler_EnableProfiling_Params;
+#define CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EnableProfiling_Params, ctx)
+/**
+ * \brief Params for cuptiProfilerDisableProfiling
+ */
+typedef struct CUpti_Profiler_DisableProfiling_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+} CUpti_Profiler_DisableProfiling_Params;
+#define CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DisableProfiling_Params, ctx)
+/**
+ * \brief Enables Profiling
+ *
+ * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in
+ * a profiling session.
+ */
+CUptiResult CUPTIAPI cuptiProfilerEnableProfiling(CUpti_Profiler_EnableProfiling_Params* pParams);
+/**
+ * \brief Disable Profiling
+ *
+ * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in
+ * a profiling session.
+ */
+CUptiResult CUPTIAPI cuptiProfilerDisableProfiling(CUpti_Profiler_DisableProfiling_Params* pParams);
+/**
+ * \brief Params for cuptiProfilerIsPassCollected
+ */
+typedef struct CUpti_Profiler_IsPassCollected_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+    size_t numRangesDropped;                                //!< [out] number of ranges whose data was dropped in the processed pass
+    size_t numTraceBytesDropped;                            //!< [out] number of bytes not written to TraceBuffer due to buffer full
+    uint8_t onePassCollected;                               //!< [out] true if a pass was successfully decoded
+    uint8_t allPassesCollected;                             //!< [out] becomes true when the last pass has been decoded
+} CUpti_Profiler_IsPassCollected_Params;
+#define CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE            CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_IsPassCollected_Params, allPassesCollected)
+/**
+ * \brief Asynchronous call to query if the submitted pass to GPU is collected
+ *
+ */
+CUptiResult CUPTIAPI cuptiProfilerIsPassCollected(CUpti_Profiler_IsPassCollected_Params* pParams);
+/**
+ * \brief Params for cuptiProfilerFlushCounterData
+ */
+typedef struct CUpti_Profiler_FlushCounterData_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+    size_t numRangesDropped;                                //!< [out] number of ranges whose data was dropped in the processed passes
+    size_t numTraceBytesDropped;                            //!< [out] number of bytes not written to TraceBuffer due to buffer full
+} CUpti_Profiler_FlushCounterData_Params;
+#define CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE           CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_FlushCounterData_Params, numTraceBytesDropped)
+/**
+ * \brief Decode all the submitted passes
+ *
+ * Flush Counter data API to ensure every pass is decoded into the counterDataImage passed at beginSession.
+ * This will cause the CPU/GPU sync to collect all the undecoded pass.
+ */
+CUptiResult CUPTIAPI cuptiProfilerFlushCounterData(CUpti_Profiler_FlushCounterData_Params* pParams);
+typedef struct CUpti_Profiler_PushRange_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_PushRange_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+    const char* pRangeName;                                 //!< [in] specifies the range for subsequent launches; must not be NULL
+    size_t rangeNameLength;                                 //!< [in] assign to strlen(pRangeName) if known; if set to zero, the library will call strlen()
+} CUpti_Profiler_PushRange_Params;
+#define CUpti_Profiler_PushRange_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PushRange_Params, rangeNameLength)
+typedef struct CUpti_Profiler_PopRange_Params
+{
+    size_t structSize;                                      //!< [in] CUpti_Profiler_PopRange_Params_STRUCT_SIZE
+    void* pPriv;                                            //!< [in] assign to NULL
+    CUcontext ctx;                                          //!< [in] if NULL, the current CUcontext is used
+} CUpti_Profiler_PopRange_Params;
+#define CUpti_Profiler_PopRange_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PopRange_Params, ctx)
+/**
+ * \brief Range API's : Push user range
+ *
+ * Counter data is collected per unique range-stack. Identified by a string label passsed by the user.
+ * It's an invalid operation in case of \ref CUPTI_AutoRange.
+ */
+CUptiResult CUPTIAPI cuptiProfilerPushRange(CUpti_Profiler_PushRange_Params *pParams);
+/**
+ * \brief Range API's : Pop user range
+ *
+ * Counter data is collected per unique range-stack. Identified by a string label passsed by the user.
+ * It's an invalid operation in case of \ref CUPTI_AutoRange.
+ */
+CUptiResult CUPTIAPI cuptiProfilerPopRange(CUpti_Profiler_PopRange_Params *pParams);
+/**
+ * \brief Params for cuptiProfilerGetCounterAvailability
+ */
+typedef struct CUpti_Profiler_GetCounterAvailability_Params
+{
+    size_t structSize;                                  //!< [in] CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE
+    void* pPriv;                                        //!< [in] assign to NULL
+    CUcontext ctx;                                      //!< [in] if NULL, the current CUcontext is used
+    size_t counterAvailabilityImageSize;                //!< [in/out] If `pCounterAvailabilityImage` is NULL, then the required size is returned in
+                                                        //!< `counterAvailabilityImageSize`, otherwise `counterAvailabilityImageSize` should be set to the size of
+                                                        //!< `pCounterAvailabilityImage`, and on return it would be overwritten with number of actual bytes copied
+    uint8_t* pCounterAvailabilityImage;                 //!< [in] buffer receiving counter availability image, may be NULL
+} CUpti_Profiler_GetCounterAvailability_Params;
+#define CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_GetCounterAvailability_Params, pCounterAvailabilityImage)
+/**
+ * \brief Query counter availibility
+ *
+ * Use this API to query counter availability information in a buffer which can be used to filter unavailable raw metrics on host.
+ * Note: This API may fail, if any profiling or sampling session is active on the specified context or its device.
+ */
+CUptiResult CUPTIAPI cuptiProfilerGetCounterAvailability(CUpti_Profiler_GetCounterAvailability_Params *pParams);
+/// Generic support level enum for CUPTI
+typedef enum
+{
+    CUPTI_PROFILER_CONFIGURATION_UNKNOWN = 0, //!< Configuration support level unknown - either detection code errored out before setting this value, or unable to determine it
+    CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED, //!< Profiling is unavailable.  For specific feature fields, this means that the current configuration of this feature does not work with profiling.  For instance, SLI-enabled devices do not support profiling, and this value would be returned for SLI on an SLI-enabled device.
+    CUPTI_PROFILER_CONFIGURATION_DISABLED,    //!< Profiling would be available for this configuration, but was disabled by the system
+    CUPTI_PROFILER_CONFIGURATION_SUPPORTED    //!< Profiling is supported.  For specific feature fields, this means that the current configuration of this feature works with profiling.  For instance, SLI-enabled devices do not support profiling, and this value would only be returned for devices which are not SLI-enabled.
+} CUpti_Profiler_Support_Level;
+/**
+ * \brief Params for cuptiProfilerDeviceSupported
+ */
+typedef struct
+{
+    size_t structSize;                                //!< [in] Must be CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE
+    void *pPriv;                                      //!< [in] assign to NULL
+    CUdevice cuDevice;                                //!< [in] if NULL, the current CUcontext is used
+    CUpti_Profiler_Support_Level isSupported;         //!< [out] overall SUPPORTED / UNSUPPORTED flag representing whether Profiling and PC Sampling APIs work on the given device and configuration. SUPPORTED if all following flags are SUPPORTED, UNSUPPORTED otherwise.
+    CUpti_Profiler_Support_Level architecture;        //!< [out] SUPPORTED if the device architecture level supports the Profiling API (Compute Capability >= 7.0), UNSUPPORTED otherwise
+    CUpti_Profiler_Support_Level sli;                 //!< [out] SUPPORTED if SLI is not enabled, UNSUPPORTED otherwise
+    CUpti_Profiler_Support_Level vGpu;                //!< [out] SUPPORTED if vGPU is supported and profiling is enabled, DISABLED if profiling is supported but not enabled, UNSUPPORTED otherwise
+    CUpti_Profiler_Support_Level confidentialCompute; //!< [out] SUPPORTED if confidential compute is not enabled, UNSUPPORTED otherwise
+    CUpti_Profiler_Support_Level cmp;                 //!< [out] SUPPORTED if not NVIDIA Crypto Mining Processors (CMP), UNSUPPORTED otherwise
+} CUpti_Profiler_DeviceSupported_Params;
+#define CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, confidentialCompute)
+/**
+ * \brief Query device compatibility with Profiling API
+ *
+ * Use this call to determine whether a compute device and configuration are compatible with the Profiling API.
+ * If the configuration does not support profiling, one of several flags will indicate why.
+ */
+CUptiResult CUPTIAPI cuptiProfilerDeviceSupported(CUpti_Profiler_DeviceSupported_Params *pParams);
+/** @} */ /* END CUPTI_METRIC_API */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+#endif /*_CUPTI_PROFILER_TARGET_H_*/

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_nvtx_meta.h ADDED Viewed

	@@ -0,0 +1,247 @@

+/*
+ * Copyright 2013-2018 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+// *************************************************************************
+//      Definitions of structs to hold parameters for each function
+// *************************************************************************
+typedef struct nvtxMarkEx_params_st {
+  const nvtxEventAttributes_t* eventAttrib;
+} nvtxMarkEx_params;
+typedef struct nvtxMarkA_params_st {
+  const char* message;
+} nvtxMarkA_params;
+typedef struct nvtxMarkW_params_st {
+  const wchar_t* message;
+} nvtxMarkW_params;
+typedef struct nvtxRangeStartEx_params_st {
+  const nvtxEventAttributes_t* eventAttrib;
+} nvtxRangeStartEx_params;
+typedef struct nvtxRangeStartA_params_st {
+  const char* message;
+} nvtxRangeStartA_params;
+typedef struct nvtxRangeStartW_params_st {
+  const wchar_t* message;
+} nvtxRangeStartW_params;
+typedef struct nvtxRangeEnd_params_st {
+  nvtxRangeId_t id;
+} nvtxRangeEnd_params;
+typedef struct nvtxRangePushEx_params_st {
+  const nvtxEventAttributes_t* eventAttrib;
+} nvtxRangePushEx_params;
+typedef struct nvtxRangePushA_params_st {
+  const char* message;
+} nvtxRangePushA_params;
+typedef struct nvtxRangePushW_params_st {
+  const wchar_t* message;
+} nvtxRangePushW_params;
+typedef struct nvtxRangePop_params_st {
+  /* WAR: Windows compiler doesn't allow empty structs */
+  /* This field shouldn't be used */
+  void *dummy;
+} nvtxRangePop_params;
+typedef struct nvtxNameCategoryA_params_st {
+  uint32_t category;
+  const char* name;
+} nvtxNameCategoryA_params;
+typedef struct nvtxNameCategoryW_params_st {
+  uint32_t category;
+  const wchar_t* name;
+} nvtxNameCategoryW_params;
+typedef struct nvtxNameOsThreadA_params_st {
+  uint32_t threadId;
+  const char* name;
+} nvtxNameOsThreadA_params;
+typedef struct nvtxNameOsThreadW_params_st {
+  uint32_t threadId;
+  const wchar_t* name;
+} nvtxNameOsThreadW_params;
+typedef struct nvtxNameCuDeviceA_params_st {
+  CUdevice device;
+  const char* name;
+} nvtxNameCuDeviceA_params;
+typedef struct nvtxNameCuDeviceW_params_st {
+  CUdevice device;
+  const wchar_t* name;
+} nvtxNameCuDeviceW_params;
+typedef struct nvtxNameCuContextA_params_st {
+  CUcontext context;
+  const char* name;
+} nvtxNameCuContextA_params;
+typedef struct nvtxNameCuContextW_params_st {
+  CUcontext context;
+  const wchar_t* name;
+} nvtxNameCuContextW_params;
+typedef struct nvtxNameCuStreamA_params_st {
+  CUstream stream;
+  const char* name;
+} nvtxNameCuStreamA_params;
+typedef struct nvtxNameCuStreamW_params_st {
+  CUstream stream;
+  const wchar_t* name;
+} nvtxNameCuStreamW_params;
+typedef struct nvtxNameCuEventA_params_st {
+  CUevent event;
+  const char* name;
+} nvtxNameCuEventA_params;
+typedef struct nvtxNameCuEventW_params_st {
+  CUevent event;
+  const wchar_t* name;
+} nvtxNameCuEventW_params;
+typedef struct nvtxNameCudaDeviceA_params_st {
+  int device;
+  const char* name;
+} nvtxNameCudaDeviceA_params;
+typedef struct nvtxNameCudaDeviceW_params_st {
+  int device;
+  const wchar_t* name;
+} nvtxNameCudaDeviceW_params;
+typedef struct nvtxNameCudaStreamA_params_st {
+  cudaStream_t stream;
+  const char* name;
+} nvtxNameCudaStreamA_params;
+typedef struct nvtxNameCudaStreamW_params_st {
+  cudaStream_t stream;
+  const wchar_t* name;
+} nvtxNameCudaStreamW_params;
+typedef struct nvtxNameCudaEventA_params_st {
+  cudaEvent_t event;
+  const char* name;
+} nvtxNameCudaEventA_params;
+typedef struct nvtxNameCudaEventW_params_st {
+  cudaEvent_t event;
+  const wchar_t* name;
+} nvtxNameCudaEventW_params;
+typedef struct nvtxDomainCreateA_params_st {
+  const char* name;
+} nvtxDomainCreateA_params;
+typedef struct nvtxDomainDestroy_params_st {
+  nvtxDomainHandle_t domain;
+} nvtxDomainDestroy_params;
+typedef struct nvtxDomainMarkEx_params_st {
+  nvtxDomainHandle_t domain;
+  nvtxMarkEx_params core;
+} nvtxDomainMarkEx_params;
+typedef struct nvtxDomainRangeStartEx_params_st {
+  nvtxDomainHandle_t domain;
+  nvtxRangeStartEx_params core;
+} nvtxDomainRangeStartEx_params;
+typedef struct nvtxDomainRangeEnd_params_st {
+  nvtxDomainHandle_t domain;
+  nvtxRangeEnd_params core;
+} nvtxDomainRangeEnd_params;
+typedef struct nvtxDomainRangePushEx_params_st {
+  nvtxDomainHandle_t domain;
+  nvtxRangePushEx_params core;
+} nvtxDomainRangePushEx_params;
+typedef struct nvtxDomainRangePop_params_st {
+  nvtxDomainHandle_t domain;
+} nvtxDomainRangePop_params;
+typedef struct nvtxSyncUserCreate_params_st {
+  nvtxDomainHandle_t domain;
+  const nvtxSyncUserAttributes_t* attribs;
+} nvtxSyncUserCreate_params;
+typedef struct nvtxSyncUserCommon_params_st {
+  nvtxSyncUser_t handle;
+} nvtxSyncUserCommon_params;
+typedef struct nvtxDomainRegisterStringA_params_st {
+    nvtxDomainHandle_t domain;
+    const char* string;
+} nvtxDomainRegisterStringA_params;
+typedef struct nvtxDomainRegisterStringW_params_st {
+    nvtxDomainHandle_t domain;
+    const char* string;
+} nvtxDomainRegisterStringW_params;
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_common.h ADDED Viewed

	@@ -0,0 +1,273 @@

+#ifndef NVPERF_COMMON_H
+#define NVPERF_COMMON_H
+/*
+ * Copyright 2014-2022  NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.
+ *
+ * This software and the information contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+ * of a form of NVIDIA software license agreement.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+#include <stddef.h>
+#include <stdint.h>
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+    #pragma GCC visibility push(default)
+    #if !defined(NVPW_LOCAL)
+        #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
+    #endif
+#else
+    #if !defined(NVPW_LOCAL)
+        #define NVPW_LOCAL
+    #endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ *  @file   nvperf_common.h
+ */
+#ifndef NVPERF_NVPA_STATUS_DEFINED
+#define NVPERF_NVPA_STATUS_DEFINED
+    /// Error codes.
+    typedef enum NVPA_Status
+    {
+        /// Success
+        NVPA_STATUS_SUCCESS = 0,
+        /// Generic error.
+        NVPA_STATUS_ERROR = 1,
+        /// Internal error.  Please file a bug!
+        NVPA_STATUS_INTERNAL_ERROR = 2,
+        /// NVPW_InitializeTarget() has not been called yet.
+        NVPA_STATUS_NOT_INITIALIZED = 3,
+        /// The NvPerf DLL/DSO could not be loaded during NVPW_Initialize*.
+        NVPA_STATUS_NOT_LOADED = 4,
+        /// The function was not found in this version of the NvPerf DLL/DSO.
+        NVPA_STATUS_FUNCTION_NOT_FOUND = 5,
+        /// The request was intentionally not supported.
+        NVPA_STATUS_NOT_SUPPORTED = 6,
+        /// The request was not implemented by this version.
+        NVPA_STATUS_NOT_IMPLEMENTED = 7,
+        /// Invalid argument.
+        NVPA_STATUS_INVALID_ARGUMENT = 8,
+        /// UNUSED
+        NVPA_STATUS_INVALID_METRIC_ID = 9,
+        /// No driver has been loaded via NVPW_*_LoadDriver().
+        NVPA_STATUS_DRIVER_NOT_LOADED = 10,
+        /// Failed memory allocation.
+        NVPA_STATUS_OUT_OF_MEMORY = 11,
+        /// UNUSED
+        NVPA_STATUS_INVALID_THREAD_STATE = 12,
+        /// UNUSED
+        NVPA_STATUS_FAILED_CONTEXT_ALLOC = 13,
+        /// The specified GPU is not supported.
+        NVPA_STATUS_UNSUPPORTED_GPU = 14,
+        /// The installed NVIDIA driver is too old.
+        NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION = 15,
+        /// UNUSED
+        NVPA_STATUS_OBJECT_NOT_REGISTERED = 16,
+        /// Profiling permission not granted; see https://developer.nvidia.com/nvidia-development-tools-solutions-
+        /// ERR_NVGPUCTRPERM-permission-issue-performance-counters
+        NVPA_STATUS_INSUFFICIENT_PRIVILEGE = 17,
+        /// UNUSED
+        NVPA_STATUS_INVALID_CONTEXT_STATE = 18,
+        /// UNUSED
+        NVPA_STATUS_INVALID_OBJECT_STATE = 19,
+        /// The request could not be fulfilled because a system resource is already in use.
+        NVPA_STATUS_RESOURCE_UNAVAILABLE = 20,
+        /// UNUSED
+        NVPA_STATUS_DRIVER_LOADED_TOO_LATE = 21,
+        /// The provided buffer is not large enough.
+        NVPA_STATUS_INSUFFICIENT_SPACE = 22,
+        /// UNUSED
+        NVPA_STATUS_OBJECT_MISMATCH = 23,
+        /// Virtualized GPU (vGPU) is not supported.
+        NVPA_STATUS_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 24,
+        /// Profiling permission was not granted or the device was disabled.
+        NVPA_STATUS_PROFILING_NOT_ALLOWED = 25,
+        NVPA_STATUS__COUNT
+    } NVPA_Status;
+#endif // NVPERF_NVPA_STATUS_DEFINED
+#ifndef NVPERF_NVPA_ACTIVITY_KIND_DEFINED
+#define NVPERF_NVPA_ACTIVITY_KIND_DEFINED
+    /// The configuration's activity-kind dictates which types of data may be collected.
+    typedef enum NVPA_ActivityKind
+    {
+        /// Invalid value.
+        NVPA_ACTIVITY_KIND_INVALID = 0,
+        /// A workload-centric activity for serialized and pipelined collection.
+        ///
+        /// Profiler is capable of collecting both serialized and pipelined metrics.  The library introduces any
+        /// synchronization required to collect serialized metrics.
+        NVPA_ACTIVITY_KIND_PROFILER,
+        /// A realtime activity for sampling counters from the CPU or GPU.
+        NVPA_ACTIVITY_KIND_REALTIME_SAMPLED,
+        /// A realtime activity for profiling counters from the CPU or GPU without CPU/GPU synchronizations.
+        NVPA_ACTIVITY_KIND_REALTIME_PROFILER,
+        NVPA_ACTIVITY_KIND__COUNT
+    } NVPA_ActivityKind;
+#endif // NVPERF_NVPA_ACTIVITY_KIND_DEFINED
+#ifndef NVPERF_NVPA_BOOL_DEFINED
+#define NVPERF_NVPA_BOOL_DEFINED
+    /// The type used for boolean values.
+    typedef uint8_t NVPA_Bool;
+#endif // NVPERF_NVPA_BOOL_DEFINED
+#ifndef NVPA_STRUCT_SIZE
+#define NVPA_STRUCT_SIZE(type_, lastfield_)                     (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
+#endif // NVPA_STRUCT_SIZE
+#ifndef NVPW_FIELD_EXISTS
+#define NVPW_FIELD_EXISTS(pParams_, name_) \
+    ((pParams_)->structSize >= (size_t)((const uint8_t*)(&(pParams_)->name_) + sizeof(pParams_)->name_ - (const uint8_t*)(pParams_)))
+#endif // NVPW_FIELD_EXISTS
+#ifndef NVPERF_NVPA_GETPROCADDRESS_DEFINED
+#define NVPERF_NVPA_GETPROCADDRESS_DEFINED
+typedef NVPA_Status (*NVPA_GenericFn)(void);
+    ///
+    /// Gets the address of an NvPerf API function.
+    ///
+    /// \return A function pointer to the function, or NULL if the function is not available.
+    ///
+    /// \param pFunctionName [in] Name of the function to retrieve.
+    NVPA_GenericFn NVPA_GetProcAddress(const char* pFunctionName);
+#endif
+#ifndef NVPERF_NVPW_SETLIBRARYLOADPATHS_DEFINED
+#define NVPERF_NVPW_SETLIBRARYLOADPATHS_DEFINED
+    typedef struct NVPW_SetLibraryLoadPaths_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in] number of paths in ppPaths
+        size_t numPaths;
+        /// [in] array of null-terminated paths
+        const char** ppPaths;
+    } NVPW_SetLibraryLoadPaths_Params;
+#define NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_SetLibraryLoadPaths_Params, ppPaths)
+    /// Sets library search path for \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget().
+    /// \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget load the NvPerf DLL/DSO.  This function sets
+    /// ordered paths that will be searched with the LoadLibrary() or dlopen() call.
+    /// If load paths are set by this function, the default set of load paths
+    /// will not be attempted.
+    /// Each path must point at a directory (not a file name).
+    /// This function is not thread-safe.
+    /// Example Usage:
+    /// \code
+    ///     const char* paths[] = {
+    ///         "path1", "path2", etc
+    ///     };
+    ///     NVPW_SetLibraryLoadPaths_Params params{NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE};
+    ///     params.numPaths = sizeof(paths)/sizeof(paths[0]);
+    ///     params.ppPaths = paths;
+    ///     NVPW_SetLibraryLoadPaths(&params);
+    ///     NVPW_InitializeHost();
+    ///     params.numPaths = 0;
+    ///     params.ppPaths = NULL;
+    ///     NVPW_SetLibraryLoadPaths(&params);
+    /// \endcode
+    NVPA_Status NVPW_SetLibraryLoadPaths(NVPW_SetLibraryLoadPaths_Params* pParams);
+    typedef struct NVPW_SetLibraryLoadPathsW_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in] number of paths in ppwPaths
+        size_t numPaths;
+        /// [in] array of null-terminated paths
+        const wchar_t** ppwPaths;
+    } NVPW_SetLibraryLoadPathsW_Params;
+#define NVPW_SetLibraryLoadPathsW_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_SetLibraryLoadPathsW_Params, ppwPaths)
+    /// Sets library search path for \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget().
+    /// \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget load the NvPerf DLL/DSO.  This function sets
+    /// ordered paths that will be searched with the LoadLibrary() or dlopen() call.
+    /// If load paths are set by this function, the default set of load paths
+    /// will not be attempted.
+    /// Each path must point at a directory (not a file name).
+    /// This function is not thread-safe.
+    /// Example Usage:
+    /// \code
+    ///     const wchar_t* wpaths[] = {
+    ///         L"path1", L"path2", etc
+    ///     };
+    ///     NVPW_SetLibraryLoadPathsW_Params params{NVPW_SetLibraryLoadPathsW_Params_STRUCT_SIZE};
+    ///     params.numPaths = sizeof(wpaths)/sizeof(wpaths[0]);
+    ///     params.ppwPaths = wpaths;
+    ///     NVPW_SetLibraryLoadPathsW(&params);
+    ///     NVPW_InitializeHost();
+    ///     params.numPaths = 0;
+    ///     params.ppwPaths = NULL;
+    ///     NVPW_SetLibraryLoadPathsW(&params);
+    /// \endcode
+    NVPA_Status NVPW_SetLibraryLoadPathsW(NVPW_SetLibraryLoadPathsW_Params* pParams);
+#endif
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+    #pragma GCC visibility pop
+#endif
+#endif // NVPERF_COMMON_H

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_cuda_host.h ADDED Viewed

	@@ -0,0 +1,197 @@

+#ifndef NVPERF_CUDA_HOST_H
+#define NVPERF_CUDA_HOST_H
+/*
+ * Copyright 2014-2022  NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.
+ *
+ * This software and the information contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+ * of a form of NVIDIA software license agreement.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+#include <stddef.h>
+#include <stdint.h>
+#include "nvperf_common.h"
+#include "nvperf_host.h"
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+    #pragma GCC visibility push(default)
+    #if !defined(NVPW_LOCAL)
+        #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
+    #endif
+#else
+    #if !defined(NVPW_LOCAL)
+        #define NVPW_LOCAL
+    #endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ *  @file   nvperf_cuda_host.h
+ */
+    /// 'NVPA_MetricsContext' and its APIs are deprecated, please use 'NVPW_MetricsEvaluator' and its APIs instead.
+    typedef struct NVPA_MetricsContext NVPA_MetricsContext;
+    typedef struct NVPW_CUDA_MetricsContext_Create_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        const char* pChipName;
+        /// [out]
+        struct NVPA_MetricsContext* pMetricsContext;
+    } NVPW_CUDA_MetricsContext_Create_Params;
+#define NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsContext_Create_Params, pMetricsContext)
+    NVPA_Status NVPW_CUDA_MetricsContext_Create(NVPW_CUDA_MetricsContext_Create_Params* pParams);
+    typedef struct NVPW_CUDA_RawMetricsConfig_Create_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        NVPA_ActivityKind activityKind;
+        /// [in]
+        const char* pChipName;
+        /// [out] new NVPA_RawMetricsConfig object
+        struct NVPA_RawMetricsConfig* pRawMetricsConfig;
+    } NVPW_CUDA_RawMetricsConfig_Create_Params;
+#define NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_Params, pRawMetricsConfig)
+    NVPA_Status NVPW_CUDA_RawMetricsConfig_Create(NVPW_CUDA_RawMetricsConfig_Create_Params* pParams);
+    typedef struct NVPW_CUDA_RawMetricsConfig_Create_V2_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        NVPA_ActivityKind activityKind;
+        /// [in] accepted for chips supported at the time-of-release.
+        const char* pChipName;
+        /// [in] buffer with counter availability image - required for future chip support
+        const uint8_t* pCounterAvailabilityImage;
+        /// [out] new NVPA_RawMetricsConfig object
+        struct NVPA_RawMetricsConfig* pRawMetricsConfig;
+    } NVPW_CUDA_RawMetricsConfig_Create_V2_Params;
+#define NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_V2_Params, pRawMetricsConfig)
+    /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
+    NVPA_Status NVPW_CUDA_RawMetricsConfig_Create_V2(NVPW_CUDA_RawMetricsConfig_Create_V2_Params* pParams);
+    typedef struct NVPW_CUDA_CounterDataBuilder_Create_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in] accepted for chips supported at the time-of-release.
+        const char* pChipName;
+        /// [in] buffer with counter availability image - required for future chip support
+        const uint8_t* pCounterAvailabilityImage;
+        /// [out] new NVPA_CounterDataBuilder object
+        struct NVPA_CounterDataBuilder* pCounterDataBuilder;
+    } NVPW_CUDA_CounterDataBuilder_Create_Params;
+#define NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_CounterDataBuilder_Create_Params, pCounterDataBuilder)
+    /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
+    NVPA_Status NVPW_CUDA_CounterDataBuilder_Create(NVPW_CUDA_CounterDataBuilder_Create_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator NVPW_MetricsEvaluator;
+    typedef struct NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in] accepted for chips supported at the time-of-release.
+        const char* pChipName;
+        /// [in] buffer with counter availability image - required for future chip support
+        const uint8_t* pCounterAvailabilityImage;
+        /// [out]
+        size_t scratchBufferSize;
+    } NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params;
+#define NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params, scratchBufferSize)
+    /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
+    NVPA_Status NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params* pParams);
+    typedef struct NVPW_CUDA_MetricsEvaluator_Initialize_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        uint8_t* pScratchBuffer;
+        /// [in] the size of the 'pScratchBuffer' array, should be at least the size of the 'scratchBufferSize' returned
+        /// by 'NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize'
+        size_t scratchBufferSize;
+        /// [in] accepted for chips supported at the time-of-release.
+        const char* pChipName;
+        /// [in] buffer with counter availability image - required for future chip support
+        const uint8_t* pCounterAvailabilityImage;
+        /// [in]
+        const uint8_t* pCounterDataImage;
+        /// [in] must be provided if 'pCounterDataImage' is not NULL
+        size_t counterDataImageSize;
+        /// [out]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+    } NVPW_CUDA_MetricsEvaluator_Initialize_Params;
+#define NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_Initialize_Params, pMetricsEvaluator)
+    /// Use one of 'pChipName', 'pCounterAvailabilityImage', or 'pCounterDataImage'. 'pChipName' or
+    /// 'pCounterAvailabilityImage' will create a metrics evaluator based on a virtual device while 'pCounterDataImage'
+    /// will create a metrics evaluator based on the actual device.
+    NVPA_Status NVPW_CUDA_MetricsEvaluator_Initialize(NVPW_CUDA_MetricsEvaluator_Initialize_Params* pParams);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+    #pragma GCC visibility pop
+#endif
+#endif // NVPERF_CUDA_HOST_H

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_host.h ADDED Viewed

	@@ -0,0 +1,1471 @@

+#ifndef NVPERF_HOST_H
+#define NVPERF_HOST_H
+/*
+ * Copyright 2014-2022  NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.
+ *
+ * This software and the information contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+ * of a form of NVIDIA software license agreement.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+#include <stddef.h>
+#include <stdint.h>
+#include "nvperf_common.h"
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+    #pragma GCC visibility push(default)
+    #if !defined(NVPW_LOCAL)
+        #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
+    #endif
+#else
+    #if !defined(NVPW_LOCAL)
+        #define NVPW_LOCAL
+    #endif
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ *  @file   nvperf_host.h
+ */
+// Guard against multiple definition of NvPerf host types
+#ifndef NVPERF_HOST_API_DEFINED
+#define NVPERF_HOST_API_DEFINED
+/***************************************************************************//**
+ *  @name   Host Configuration
+ *  @{
+ */
+    typedef struct NVPW_InitializeHost_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+    } NVPW_InitializeHost_Params;
+#define NVPW_InitializeHost_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_InitializeHost_Params, pPriv)
+    /// Load the host library.
+    NVPA_Status NVPW_InitializeHost(NVPW_InitializeHost_Params* pParams);
+    typedef struct NVPW_CounterData_CalculateCounterDataImageCopySize_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// The CounterDataPrefix generated from e.g.    nvperf2 initdata   or
+        /// NVPW_CounterDataBuilder_GetCounterDataPrefix().  Must be align(8).
+        const uint8_t* pCounterDataPrefix;
+        size_t counterDataPrefixSize;
+        /// max number of ranges that can be profiled
+        uint32_t maxNumRanges;
+        /// max number of RangeTree nodes; must be >= maxNumRanges
+        uint32_t maxNumRangeTreeNodes;
+        /// max string length of each RangeName, including the trailing NUL character
+        uint32_t maxRangeNameLength;
+        const uint8_t* pCounterDataSrc;
+        /// [out] required size of the copy buffer
+        size_t copyDataImageCounterSize;
+    } NVPW_CounterData_CalculateCounterDataImageCopySize_Params;
+#define NVPW_CounterData_CalculateCounterDataImageCopySize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_CalculateCounterDataImageCopySize_Params, copyDataImageCounterSize)
+    NVPA_Status NVPW_CounterData_CalculateCounterDataImageCopySize(NVPW_CounterData_CalculateCounterDataImageCopySize_Params* pParams);
+    typedef struct NVPW_CounterData_InitializeCounterDataImageCopy_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// The CounterDataPrefix generated from e.g.    nvperf2 initdata   or
+        /// NVPW_CounterDataBuilder_GetCounterDataPrefix().  Must be align(8).
+        const uint8_t* pCounterDataPrefix;
+        size_t counterDataPrefixSize;
+        /// max number of ranges that can be profiled
+        uint32_t maxNumRanges;
+        /// max number of RangeTree nodes; must be >= maxNumRanges
+        uint32_t maxNumRangeTreeNodes;
+        /// max string length of each RangeName, including the trailing NUL character
+        uint32_t maxRangeNameLength;
+        const uint8_t* pCounterDataSrc;
+        uint8_t* pCounterDataDst;
+    } NVPW_CounterData_InitializeCounterDataImageCopy_Params;
+#define NVPW_CounterData_InitializeCounterDataImageCopy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_InitializeCounterDataImageCopy_Params, pCounterDataDst)
+    NVPA_Status NVPW_CounterData_InitializeCounterDataImageCopy(NVPW_CounterData_InitializeCounterDataImageCopy_Params* pParams);
+    typedef struct NVPA_CounterDataCombiner NVPA_CounterDataCombiner;
+    typedef struct NVPW_CounterDataCombiner_Create_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// The destination counter data into which the source datas will be combined
+        uint8_t* pCounterDataDst;
+        /// [out] The created counter data combiner
+        NVPA_CounterDataCombiner* pCounterDataCombiner;
+    } NVPW_CounterDataCombiner_Create_Params;
+#define NVPW_CounterDataCombiner_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_Create_Params, pCounterDataCombiner)
+    NVPA_Status NVPW_CounterDataCombiner_Create(NVPW_CounterDataCombiner_Create_Params* pParams);
+    typedef struct NVPW_CounterDataCombiner_Destroy_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataCombiner* pCounterDataCombiner;
+    } NVPW_CounterDataCombiner_Destroy_Params;
+#define NVPW_CounterDataCombiner_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_Destroy_Params, pCounterDataCombiner)
+    NVPA_Status NVPW_CounterDataCombiner_Destroy(NVPW_CounterDataCombiner_Destroy_Params* pParams);
+    typedef struct NVPW_CounterDataCombiner_CreateRange_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataCombiner* pCounterDataCombiner;
+        size_t numDescriptions;
+        const char* const* ppDescriptions;
+        /// [out]
+        size_t rangeIndexDst;
+    } NVPW_CounterDataCombiner_CreateRange_Params;
+#define NVPW_CounterDataCombiner_CreateRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_CreateRange_Params, rangeIndexDst)
+    NVPA_Status NVPW_CounterDataCombiner_CreateRange(NVPW_CounterDataCombiner_CreateRange_Params* pParams);
+    typedef struct NVPW_CounterDataCombiner_AccumulateIntoRange_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataCombiner* pCounterDataCombiner;
+        size_t rangeIndexDst;
+        uint32_t dstMultiplier;
+        const uint8_t* pCounterDataSrc;
+        size_t rangeIndexSrc;
+        uint32_t srcMultiplier;
+    } NVPW_CounterDataCombiner_AccumulateIntoRange_Params;
+#define NVPW_CounterDataCombiner_AccumulateIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_AccumulateIntoRange_Params, srcMultiplier)
+    NVPA_Status NVPW_CounterDataCombiner_AccumulateIntoRange(NVPW_CounterDataCombiner_AccumulateIntoRange_Params* pParams);
+    typedef struct NVPW_CounterDataCombiner_SumIntoRange_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataCombiner* pCounterDataCombiner;
+        size_t rangeIndexDst;
+        const uint8_t* pCounterDataSrc;
+        size_t rangeIndexSrc;
+    } NVPW_CounterDataCombiner_SumIntoRange_Params;
+#define NVPW_CounterDataCombiner_SumIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_SumIntoRange_Params, rangeIndexSrc)
+    NVPA_Status NVPW_CounterDataCombiner_SumIntoRange(NVPW_CounterDataCombiner_SumIntoRange_Params* pParams);
+    typedef struct NVPW_CounterDataCombiner_WeightedSumIntoRange_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataCombiner* pCounterDataCombiner;
+        size_t rangeIndexDst;
+        double dstMultiplier;
+        const uint8_t* pCounterDataSrc;
+        size_t rangeIndexSrc;
+        double srcMultiplier;
+    } NVPW_CounterDataCombiner_WeightedSumIntoRange_Params;
+#define NVPW_CounterDataCombiner_WeightedSumIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_WeightedSumIntoRange_Params, srcMultiplier)
+    NVPA_Status NVPW_CounterDataCombiner_WeightedSumIntoRange(NVPW_CounterDataCombiner_WeightedSumIntoRange_Params* pParams);
+/**
+ *  @}
+ ******************************************************************************/
+/***************************************************************************//**
+ *  @name   Metrics Configuration
+ *  @{
+ */
+    typedef struct NVPA_RawMetricsConfig NVPA_RawMetricsConfig;
+    typedef struct NVPA_RawMetricRequest
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// in
+        const char* pMetricName;
+        /// in
+        NVPA_Bool isolated;
+        /// in; ignored by AddMetric but observed by CounterData initialization
+        NVPA_Bool keepInstances;
+    } NVPA_RawMetricRequest;
+#define NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPA_RawMetricRequest, keepInstances)
+    typedef struct NVPW_GetSupportedChipNames_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [out]
+        const char* const* ppChipNames;
+        /// [out]
+        size_t numChipNames;
+    } NVPW_GetSupportedChipNames_Params;
+#define NVPW_GetSupportedChipNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_GetSupportedChipNames_Params, numChipNames)
+    NVPA_Status NVPW_GetSupportedChipNames(NVPW_GetSupportedChipNames_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_Destroy_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_RawMetricsConfig* pRawMetricsConfig;
+    } NVPW_RawMetricsConfig_Destroy_Params;
+#define NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_Destroy_Params, pRawMetricsConfig)
+    NVPA_Status NVPW_RawMetricsConfig_Destroy(NVPW_RawMetricsConfig_Destroy_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_SetCounterAvailability_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_RawMetricsConfig* pRawMetricsConfig;
+        /// [in] buffer with counter availability image
+        const uint8_t* pCounterAvailabilityImage;
+    } NVPW_RawMetricsConfig_SetCounterAvailability_Params;
+#define NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_SetCounterAvailability_Params, pCounterAvailabilityImage)
+    NVPA_Status NVPW_RawMetricsConfig_SetCounterAvailability(NVPW_RawMetricsConfig_SetCounterAvailability_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_BeginPassGroup_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_RawMetricsConfig* pRawMetricsConfig;
+        size_t maxPassCount;
+    } NVPW_RawMetricsConfig_BeginPassGroup_Params;
+#define NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_BeginPassGroup_Params, maxPassCount)
+    NVPA_Status NVPW_RawMetricsConfig_BeginPassGroup(NVPW_RawMetricsConfig_BeginPassGroup_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_EndPassGroup_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_RawMetricsConfig* pRawMetricsConfig;
+    } NVPW_RawMetricsConfig_EndPassGroup_Params;
+#define NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_EndPassGroup_Params, pRawMetricsConfig)
+    NVPA_Status NVPW_RawMetricsConfig_EndPassGroup(NVPW_RawMetricsConfig_EndPassGroup_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_GetNumMetrics_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        const NVPA_RawMetricsConfig* pRawMetricsConfig;
+        /// [out]
+        size_t numMetrics;
+    } NVPW_RawMetricsConfig_GetNumMetrics_Params;
+#define NVPW_RawMetricsConfig_GetNumMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumMetrics_Params, numMetrics)
+    NVPA_Status NVPW_RawMetricsConfig_GetNumMetrics(NVPW_RawMetricsConfig_GetNumMetrics_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_GetMetricProperties_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        const NVPA_RawMetricsConfig* pRawMetricsConfig;
+        size_t metricIndex;
+        /// [out]
+        const char* pMetricName;
+        /// [out]
+        NVPA_Bool supportsPipelined;
+        /// [out]
+        NVPA_Bool supportsIsolated;
+    } NVPW_RawMetricsConfig_GetMetricProperties_Params;
+#define NVPW_RawMetricsConfig_GetMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetMetricProperties_Params, supportsIsolated)
+    NVPA_Status NVPW_RawMetricsConfig_GetMetricProperties(NVPW_RawMetricsConfig_GetMetricProperties_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_GetMetricProperties_V2_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        const NVPA_RawMetricsConfig* pRawMetricsConfig;
+        size_t metricIndex;
+        /// [out]
+        const char* pMetricName;
+    } NVPW_RawMetricsConfig_GetMetricProperties_V2_Params;
+#define NVPW_RawMetricsConfig_GetMetricProperties_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetMetricProperties_V2_Params, pMetricName)
+    NVPA_Status NVPW_RawMetricsConfig_GetMetricProperties_V2(NVPW_RawMetricsConfig_GetMetricProperties_V2_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_AddMetrics_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_RawMetricsConfig* pRawMetricsConfig;
+        const NVPA_RawMetricRequest* pRawMetricRequests;
+        size_t numMetricRequests;
+    } NVPW_RawMetricsConfig_AddMetrics_Params;
+#define NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_AddMetrics_Params, numMetricRequests)
+    NVPA_Status NVPW_RawMetricsConfig_AddMetrics(NVPW_RawMetricsConfig_AddMetrics_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_IsAddMetricsPossible_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        const NVPA_RawMetricsConfig* pRawMetricsConfig;
+        const NVPA_RawMetricRequest* pRawMetricRequests;
+        size_t numMetricRequests;
+        /// [out]
+        NVPA_Bool isPossible;
+    } NVPW_RawMetricsConfig_IsAddMetricsPossible_Params;
+#define NVPW_RawMetricsConfig_IsAddMetricsPossible_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_IsAddMetricsPossible_Params, isPossible)
+    NVPA_Status NVPW_RawMetricsConfig_IsAddMetricsPossible(NVPW_RawMetricsConfig_IsAddMetricsPossible_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_GenerateConfigImage_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_RawMetricsConfig* pRawMetricsConfig;
+        /// [in] If true, all existing pass groups may be merged to reduce number of passes.
+        /// If merge was successful, distribution of counters in passes may be updated as a side-effect. The effects
+        /// will be persistent in pRawMetricsConfig.
+        NVPA_Bool mergeAllPassGroups;
+    } NVPW_RawMetricsConfig_GenerateConfigImage_Params;
+#define NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GenerateConfigImage_Params, mergeAllPassGroups)
+    /// This API may fail if called inside a pass group with `mergeAllPassGroups` = true.
+    NVPA_Status NVPW_RawMetricsConfig_GenerateConfigImage(NVPW_RawMetricsConfig_GenerateConfigImage_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_GetConfigImage_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        const NVPA_RawMetricsConfig* pRawMetricsConfig;
+        /// [in] Number of bytes allocated for pBuffer
+        size_t bytesAllocated;
+        /// [out] [optional] Buffer receiving the config image
+        uint8_t* pBuffer;
+        /// [out] Count of bytes that would be copied into pBuffer
+        size_t bytesCopied;
+    } NVPW_RawMetricsConfig_GetConfigImage_Params;
+#define NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetConfigImage_Params, bytesCopied)
+    NVPA_Status NVPW_RawMetricsConfig_GetConfigImage(NVPW_RawMetricsConfig_GetConfigImage_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_GetNumPasses_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        const NVPA_RawMetricsConfig* pRawMetricsConfig;
+        /// [out]
+        size_t numPipelinedPasses;
+        /// [out]
+        size_t numIsolatedPasses;
+    } NVPW_RawMetricsConfig_GetNumPasses_Params;
+#define NVPW_RawMetricsConfig_GetNumPasses_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumPasses_Params, numIsolatedPasses)
+    /// Total num passes = numPipelinedPasses + numIsolatedPasses * numNestingLevels
+    NVPA_Status NVPW_RawMetricsConfig_GetNumPasses(NVPW_RawMetricsConfig_GetNumPasses_Params* pParams);
+    typedef struct NVPW_RawMetricsConfig_GetNumPasses_V2_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        const NVPA_RawMetricsConfig* pRawMetricsConfig;
+        /// [out]
+        size_t numPasses;
+    } NVPW_RawMetricsConfig_GetNumPasses_V2_Params;
+#define NVPW_RawMetricsConfig_GetNumPasses_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumPasses_V2_Params, numPasses)
+    /// Total num passes = numPasses * numNestingLevels
+    NVPA_Status NVPW_RawMetricsConfig_GetNumPasses_V2(NVPW_RawMetricsConfig_GetNumPasses_V2_Params* pParams);
+/**
+ *  @}
+ ******************************************************************************/
+/***************************************************************************//**
+ *  @name   CounterData Creation
+ *  @{
+ */
+    typedef struct NVPA_CounterDataBuilder NVPA_CounterDataBuilder;
+    typedef struct NVPW_CounterDataBuilder_Create_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [out]
+        NVPA_CounterDataBuilder* pCounterDataBuilder;
+        const char* pChipName;
+    } NVPW_CounterDataBuilder_Create_Params;
+#define NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_Create_Params, pChipName)
+    NVPA_Status NVPW_CounterDataBuilder_Create(NVPW_CounterDataBuilder_Create_Params* pParams);
+    typedef struct NVPW_CounterDataBuilder_Destroy_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataBuilder* pCounterDataBuilder;
+    } NVPW_CounterDataBuilder_Destroy_Params;
+#define NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_Destroy_Params, pCounterDataBuilder)
+    NVPA_Status NVPW_CounterDataBuilder_Destroy(NVPW_CounterDataBuilder_Destroy_Params* pParams);
+    typedef struct NVPW_CounterDataBuilder_AddMetrics_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataBuilder* pCounterDataBuilder;
+        const NVPA_RawMetricRequest* pRawMetricRequests;
+        size_t numMetricRequests;
+    } NVPW_CounterDataBuilder_AddMetrics_Params;
+#define NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_AddMetrics_Params, numMetricRequests)
+    NVPA_Status NVPW_CounterDataBuilder_AddMetrics(NVPW_CounterDataBuilder_AddMetrics_Params* pParams);
+    typedef struct NVPW_CounterDataBuilder_GetCounterDataPrefix_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_CounterDataBuilder* pCounterDataBuilder;
+        /// [in] Number of bytes allocated for pBuffer
+        size_t bytesAllocated;
+        /// [out] [optional] Buffer receiving the counter data prefix
+        uint8_t* pBuffer;
+        /// [out] Count of bytes that would be copied to pBuffer
+        size_t bytesCopied;
+    } NVPW_CounterDataBuilder_GetCounterDataPrefix_Params;
+#define NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_GetCounterDataPrefix_Params, bytesCopied)
+    NVPA_Status NVPW_CounterDataBuilder_GetCounterDataPrefix(NVPW_CounterDataBuilder_GetCounterDataPrefix_Params* pParams);
+/**
+ *  @}
+ ******************************************************************************/
+/***************************************************************************//**
+ *  @name   MetricsContext - metric configuration and evaluation
+ *  @{
+ */
+    /// 'NVPA_MetricsContext' and its APIs are deprecated, please use 'NVPW_MetricsEvaluator' and its APIs instead.
+    typedef struct NVPA_MetricsContext NVPA_MetricsContext;
+    typedef enum NVPA_MetricDetailLevel
+    {
+        NVPA_METRIC_DETAIL_LEVEL_INVALID,
+        NVPA_METRIC_DETAIL_LEVEL_GPU,
+        NVPA_METRIC_DETAIL_LEVEL_ALL,
+        NVPA_METRIC_DETAIL_LEVEL_GPU_AND_LEAF_INSTANCES,
+        NVPA_METRIC_DETAIL_LEVEL__COUNT
+    } NVPA_MetricDetailLevel;
+    typedef struct NVPW_MetricsContext_Destroy_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_Destroy_Params;
+#define NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_Destroy_Params, pMetricsContext)
+    NVPA_Status NVPW_MetricsContext_Destroy(NVPW_MetricsContext_Destroy_Params* pParams);
+    typedef struct NVPW_MetricsContext_RunScript_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// in : if true, upon error, calls PyErr_Print() which causes exceptions to be logged to stderr
+        NVPA_Bool printErrors;
+        /// in : the script source code
+        const char* pSource;
+        /// in : the filename reported in stack traces; if NULL, uses an auto-generated name
+        const char* pFileName;
+    } NVPW_MetricsContext_RunScript_Params;
+#define NVPW_MetricsContext_RunScript_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_RunScript_Params, pFileName)
+    /// Runs code in the metrics module.  Additional metrics can be added through this interface.
+    /// If printErrors is true, calls PyErr_Print() which causes exceptions to be logged to stderr.
+    /// Equivalent to:
+    ///      exec(source, metrics.__dict__, metrics.__dict__)
+    NVPA_Status NVPW_MetricsContext_RunScript(NVPW_MetricsContext_RunScript_Params* pParams);
+    typedef struct NVPW_MetricsContext_ExecScript_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// in : if true, treats pSource as a statement to be eval'd; otherwise, calls exec.
+        NVPA_Bool isStatement;
+        /// in : if true, upon error, calls PyErr_Print() which causes exceptions to be logged to stderr
+        NVPA_Bool printErrors;
+        /// in : the script source code
+        const char* pSource;
+        /// in : the filename reported in stack traces; if NULL, uses an auto-generated name
+        const char* pFileName;
+        /// out: if isStatement, points at a string form of the evaluation; if !isStatement, points at
+        /// str(locals()['result'])
+        const char* pResultStr;
+    } NVPW_MetricsContext_ExecScript_Begin_Params;
+#define NVPW_MetricsContext_ExecScript_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_ExecScript_Begin_Params, pResultStr)
+    /// Executes a script in the metrics module, but does not modify its contents (for ordinary queries).
+    /// Equivalent to one of:
+    ///      eval(source, metrics.__dict__, {})            # isStatement true
+    ///      exec(source, metrics.__dict__, {})            # isStatement false
+    NVPA_Status NVPW_MetricsContext_ExecScript_Begin(NVPW_MetricsContext_ExecScript_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_ExecScript_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_ExecScript_End_Params;
+#define NVPW_MetricsContext_ExecScript_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_ExecScript_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_ExecScript_Begin.
+    NVPA_Status NVPW_MetricsContext_ExecScript_End(NVPW_MetricsContext_ExecScript_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetCounterNames_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// [out]
+        size_t numCounters;
+        /// [out]
+        const char* const* ppCounterNames;
+    } NVPW_MetricsContext_GetCounterNames_Begin_Params;
+#define NVPW_MetricsContext_GetCounterNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetCounterNames_Begin_Params, ppCounterNames)
+    /// Outputs (size, pointer) to an array of "const char* pCounterName".  The lifetime of the array is tied to
+    /// MetricsContext.  The names are sorted.
+    /// Impl: lazily creates list
+    NVPA_Status NVPW_MetricsContext_GetCounterNames_Begin(NVPW_MetricsContext_GetCounterNames_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetCounterNames_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetCounterNames_End_Params;
+#define NVPW_MetricsContext_GetCounterNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetCounterNames_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetCounterNames_Begin.
+    NVPA_Status NVPW_MetricsContext_GetCounterNames_End(NVPW_MetricsContext_GetCounterNames_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetThroughputNames_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// [out]
+        size_t numThroughputs;
+        /// [out]
+        const char* const* ppThroughputNames;
+    } NVPW_MetricsContext_GetThroughputNames_Begin_Params;
+#define NVPW_MetricsContext_GetThroughputNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputNames_Begin_Params, ppThroughputNames)
+    /// Outputs (size, pointer) to an array of "const char* pThroughputName".  The lifetime of the array is tied to
+    /// MetricsContext.  The names are sorted.
+    /// Impl: lazily creates list
+    NVPA_Status NVPW_MetricsContext_GetThroughputNames_Begin(NVPW_MetricsContext_GetThroughputNames_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetThroughputNames_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetThroughputNames_End_Params;
+#define NVPW_MetricsContext_GetThroughputNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputNames_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetThroughputNames_Begin.
+    NVPA_Status NVPW_MetricsContext_GetThroughputNames_End(NVPW_MetricsContext_GetThroughputNames_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetRatioNames_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// [out]
+        size_t numRatios;
+        /// [out]
+        const char* const* ppRatioNames;
+    } NVPW_MetricsContext_GetRatioNames_Begin_Params;
+#define NVPW_MetricsContext_GetRatioNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetRatioNames_Begin_Params, ppRatioNames)
+    /// Outputs (size, pointer) to an array of "const char* pRatioName".  The lifetime of the array is tied to
+    /// MetricsContext.  The names are sorted.
+    /// Impl: lazily creates list
+    NVPA_Status NVPW_MetricsContext_GetRatioNames_Begin(NVPW_MetricsContext_GetRatioNames_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetRatioNames_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetRatioNames_End_Params;
+#define NVPW_MetricsContext_GetRatioNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetRatioNames_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetCounterNames_Begin.
+    NVPA_Status NVPW_MetricsContext_GetRatioNames_End(NVPW_MetricsContext_GetRatioNames_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricNames_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// out: number of elements in array ppMetricNames
+        size_t numMetrics;
+        /// out: pointer to array of 'const char* pMetricName'
+        const char* const* ppMetricNames;
+        /// in : if true, doesn't enumerate \<metric\>.peak_{burst, sustained}
+        NVPA_Bool hidePeakSubMetrics;
+        /// in : if true, doesn't enumerate \<metric\>.per_{active,elapsed,region,frame}_cycle
+        NVPA_Bool hidePerCycleSubMetrics;
+        /// in : if true, doesn't enumerate \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
+        NVPA_Bool hidePctOfPeakSubMetrics;
+        /// in : if false, enumerate \<unit\>__throughput.pct_of_peak_sustained_elapsed even if hidePctOfPeakSubMetrics
+        /// is true
+        NVPA_Bool hidePctOfPeakSubMetricsOnThroughputs;
+    } NVPW_MetricsContext_GetMetricNames_Begin_Params;
+#define NVPW_MetricsContext_GetMetricNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricNames_Begin_Params, hidePctOfPeakSubMetricsOnThroughputs)
+    /// Outputs (size, pointer) to an array of "const char* pMetricName".  The lifetime of the array is tied to
+    /// MetricsContext.  The names are sorted.
+    /// Enumerates all metrics at all levels.  Includes:
+    ///  *   counter.{sum,avg,min,max}
+    ///  *   throughput.{avg,min,max}
+    ///  *   \<metric\>.peak_{burst, sustained}
+    ///  *   \<metric\>.per_{active,elapsed,region,frame}_cycle
+    ///  *   \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
+    ///  *   \<metric\>.per.{other, other_pct}
+    NVPA_Status NVPW_MetricsContext_GetMetricNames_Begin(NVPW_MetricsContext_GetMetricNames_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricNames_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetMetricNames_End_Params;
+#define NVPW_MetricsContext_GetMetricNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricNames_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricNames_Begin.
+    NVPA_Status NVPW_MetricsContext_GetMetricNames_End(NVPW_MetricsContext_GetMetricNames_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        const char* pThroughputName;
+        const char* const* ppCounterNames;
+        const char* const* ppSubThroughputNames;
+    } NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params;
+#define NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params, ppSubThroughputNames)
+    /// After this function returns, the lifetimes of strings pointed to by {ppCounterNames, ppSubThroughputNames,
+    /// ppSubMetricNames} are guaranteed until NVPW_MetricsContext_GetThroughputBreakdown_End, or until pMetricsContext
+    /// is destroyed
+    NVPA_Status NVPW_MetricsContext_GetThroughputBreakdown_Begin(NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetThroughputBreakdown_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetThroughputBreakdown_End_Params;
+#define NVPW_MetricsContext_GetThroughputBreakdown_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputBreakdown_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetThroughputBreakdown_Begin.
+    NVPA_Status NVPW_MetricsContext_GetThroughputBreakdown_End(NVPW_MetricsContext_GetThroughputBreakdown_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricProperties_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        const char* pMetricName;
+        /// out
+        const char* pDescription;
+        /// out
+        const char* pDimUnits;
+        /// out: a NULL-terminated array of pointers to RawMetric names that can be passed to
+        /// NVPW_RawMetricsConfig_AddMetrics()
+        const char** ppRawMetricDependencies;
+        /// out: metric.peak_burst.value.gpu
+        double gpuBurstRate;
+        /// out: metric.peak_sustained.value.gpu
+        double gpuSustainedRate;
+        /// out: a NULL-terminated array of pointers to RawMetric names that can be passed to
+        /// NVPW_RawMetricsConfig_AddMetrics().
+        const char** ppOptionalRawMetricDependencies;
+    } NVPW_MetricsContext_GetMetricProperties_Begin_Params;
+#define NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricProperties_Begin_Params, ppOptionalRawMetricDependencies)
+    /// After this function returns, the lifetimes of strings pointed to by pMetricProperties or
+    /// ppOptionalRawMetricDependencies are guaranteed until NVPW_MetricsContext_GetMetricProperties_End, or until
+    /// pMetricsContext is destroyed.
+    NVPA_Status NVPW_MetricsContext_GetMetricProperties_Begin(NVPW_MetricsContext_GetMetricProperties_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricProperties_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetMetricProperties_End_Params;
+#define NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricProperties_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricProperties_Begin.
+    NVPA_Status NVPW_MetricsContext_GetMetricProperties_End(NVPW_MetricsContext_GetMetricProperties_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_SetCounterData_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        const uint8_t* pCounterDataImage;
+        size_t rangeIndex;
+        NVPA_Bool isolated;
+    } NVPW_MetricsContext_SetCounterData_Params;
+#define NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_SetCounterData_Params, isolated)
+    /// Sets data for subsequent evaluation calls.
+    /// Only one (CounterData, range, isolated) set of counters can be active at a time; subsequent calls will overwrite
+    /// previous calls' data.
+    NVPA_Status NVPW_MetricsContext_SetCounterData(NVPW_MetricsContext_SetCounterData_Params* pParams);
+    typedef struct NVPW_MetricsContext_SetUserData_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// duration in ns of user defined frame
+        double frameDuration;
+        /// duration in ns of user defined region
+        double regionDuration;
+    } NVPW_MetricsContext_SetUserData_Params;
+#define NVPW_MetricsContext_SetUserData_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_SetUserData_Params, regionDuration)
+    /// Sets user data for subsequent evaluation calls.
+    NVPA_Status NVPW_MetricsContext_SetUserData(NVPW_MetricsContext_SetUserData_Params* pParams);
+    typedef struct NVPW_MetricsContext_EvaluateToGpuValues_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        size_t numMetrics;
+        const char* const* ppMetricNames;
+        /// [out]
+        double* pMetricValues;
+    } NVPW_MetricsContext_EvaluateToGpuValues_Params;
+#define NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_EvaluateToGpuValues_Params, pMetricValues)
+    /// Evaluate multiple metrics to retrieve their GPU values.
+    NVPA_Status NVPW_MetricsContext_EvaluateToGpuValues(NVPW_MetricsContext_EvaluateToGpuValues_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricSuffix_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// in: pointer to the metric name
+        const char* pMetricName;
+        /// out: number of elements in array ppSuffixes
+        size_t numSuffixes;
+        /// out: pointer to array of 'const char* pSuffixes'
+        const char* const* ppSuffixes;
+        /// in : if true, doesn't enumerate \<metric\>.peak_{burst, sustained}
+        NVPA_Bool hidePeakSubMetrics;
+        /// in : if true, doesn't enumerate \<metric\>.per_{active,elapsed,region,frame}_cycle
+        NVPA_Bool hidePerCycleSubMetrics;
+        /// in : if true, doesn't enumerate \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
+        NVPA_Bool hidePctOfPeakSubMetrics;
+        /// in : if false, enumerate \<unit\>__throughput.pct_of_peak_sustained_elapsed even if hidePctOfPeakSubMetrics
+        /// is true
+        NVPA_Bool hidePctOfPeakSubMetricsOnThroughputs;
+    } NVPW_MetricsContext_GetMetricSuffix_Begin_Params;
+#define NVPW_MetricsContext_GetMetricSuffix_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricSuffix_Begin_Params, hidePctOfPeakSubMetricsOnThroughputs)
+    /// Outputs (size, pointer) to an array of "const char* pSuffixes".  The lifetime of the array is tied to
+    /// MetricsContext.
+    /// return all the suffixes the metric has.  the possible suffixes include:
+    ///  *   counter.{sum,avg,min,max}
+    ///  *   throughput.{avg,min,max}
+    ///  *   \<metric\>.peak_{burst, sustained}
+    ///  *   \<metric\>.per_{active,elapsed,region,frame}_cycle
+    ///  *   \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
+    ///  *   \<metric\>.per.{other, other_pct}
+    NVPA_Status NVPW_MetricsContext_GetMetricSuffix_Begin(NVPW_MetricsContext_GetMetricSuffix_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricSuffix_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetMetricSuffix_End_Params;
+#define NVPW_MetricsContext_GetMetricSuffix_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricSuffix_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricSuffix_Begin.
+    NVPA_Status NVPW_MetricsContext_GetMetricSuffix_End(NVPW_MetricsContext_GetMetricSuffix_End_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricBaseNames_Begin_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+        /// out: number of elements in array pMetricsBaseNames
+        size_t numMetricBaseNames;
+        /// out: pointer to array of 'const char* pMetricsBaseName'
+        const char* const* ppMetricBaseNames;
+    } NVPW_MetricsContext_GetMetricBaseNames_Begin_Params;
+#define NVPW_MetricsContext_GetMetricBaseNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricBaseNames_Begin_Params, ppMetricBaseNames)
+    /// Outputs (size, pointer) to an array of "const char* ppMetricBaseNames".  The lifetime of the array is tied to
+    /// MetricsContext.
+    /// return all the metric base names.
+    NVPA_Status NVPW_MetricsContext_GetMetricBaseNames_Begin(NVPW_MetricsContext_GetMetricBaseNames_Begin_Params* pParams);
+    typedef struct NVPW_MetricsContext_GetMetricBaseNames_End_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        NVPA_MetricsContext* pMetricsContext;
+    } NVPW_MetricsContext_GetMetricBaseNames_End_Params;
+#define NVPW_MetricsContext_GetMetricBaseNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricBaseNames_End_Params, pMetricsContext)
+    /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricBaseNames_Begin.
+    NVPA_Status NVPW_MetricsContext_GetMetricBaseNames_End(NVPW_MetricsContext_GetMetricBaseNames_End_Params* pParams);
+/**
+ *  @}
+ ******************************************************************************/
+/***************************************************************************//**
+ *  @name   Metrics Evaluator
+ *  @{
+ */
+    typedef struct NVPW_MetricsEvaluator NVPW_MetricsEvaluator;
+#ifndef NVPW_DIM_UNIT_DEFINED
+#define NVPW_DIM_UNIT_DEFINED
+    typedef enum NVPW_DimUnitName
+    {
+        NVPW_DIM_UNIT_INVALID = 3518299157,
+        NVPW_DIM_UNIT_UNITLESS = 2126137902,
+        NVPW_DIM_UNIT_ATTRIBUTES = 3776338729,
+        NVPW_DIM_UNIT_BYTES = 3797850191,
+        NVPW_DIM_UNIT_CTAS = 1960564139,
+        NVPW_DIM_UNIT_DRAM_CYCLES = 2650981327,
+        NVPW_DIM_UNIT_FBP_CYCLES = 1785238957,
+        NVPW_DIM_UNIT_FE_OPS = 2919159083,
+        NVPW_DIM_UNIT_GPC_CYCLES = 1222631184,
+        NVPW_DIM_UNIT_IDC_REQUESTS = 2012649669,
+        NVPW_DIM_UNIT_INSTRUCTIONS = 1418625543,
+        NVPW_DIM_UNIT_L1DATA_BANK_ACCESSES = 1479493682,
+        NVPW_DIM_UNIT_L1DATA_BANK_CONFLICTS = 3433170787,
+        NVPW_DIM_UNIT_L1TEX_REQUESTS = 1306473767,
+        NVPW_DIM_UNIT_L1TEX_TAGS = 26573010,
+        NVPW_DIM_UNIT_L1TEX_WAVEFRONTS = 129373765,
+        NVPW_DIM_UNIT_L2_REQUESTS = 1143695106,
+        NVPW_DIM_UNIT_L2_SECTORS = 3424101564,
+        NVPW_DIM_UNIT_L2_TAGS = 3755612781,
+        NVPW_DIM_UNIT_NANOSECONDS = 3047500672,
+        NVPW_DIM_UNIT_NVLRX_CYCLES = 4059934930,
+        NVPW_DIM_UNIT_NVLTX_CYCLES = 1814350488,
+        NVPW_DIM_UNIT_PCIE_CYCLES = 1230450943,
+        NVPW_DIM_UNIT_PERCENT = 1284354694,
+        NVPW_DIM_UNIT_PIXELS = 4227616663,
+        NVPW_DIM_UNIT_PIXEL_SHADER_BARRIERS = 3705502518,
+        NVPW_DIM_UNIT_PRIMITIVES = 2373084002,
+        NVPW_DIM_UNIT_QUADS = 1539753497,
+        NVPW_DIM_UNIT_REGISTERS = 2837260947,
+        NVPW_DIM_UNIT_SAMPLES = 746046551,
+        NVPW_DIM_UNIT_SECONDS = 1164825258,
+        NVPW_DIM_UNIT_SYS_CYCLES = 3310821688,
+        NVPW_DIM_UNIT_TEXELS = 1293214069,
+        NVPW_DIM_UNIT_THREADS = 164261907,
+        NVPW_DIM_UNIT_VERTICES = 1873662209,
+        NVPW_DIM_UNIT_WARPS = 97951949,
+        NVPW_DIM_UNIT_WORKLOADS = 1728142656
+    } NVPW_DimUnitName;
+#endif //NVPW_DIM_UNIT_DEFINED
+#ifndef NVPW_HW_UNIT_DEFINED
+#define NVPW_HW_UNIT_DEFINED
+    typedef enum NVPW_HwUnit
+    {
+        NVPW_HW_UNIT_INVALID = 3498035701,
+        NVPW_HW_UNIT_CROP = 2872137846,
+        NVPW_HW_UNIT_DRAM = 1662616918,
+        NVPW_HW_UNIT_DRAMC = 1401232876,
+        NVPW_HW_UNIT_FBP = 2947194306,
+        NVPW_HW_UNIT_FBPA = 690045803,
+        NVPW_HW_UNIT_FE = 2204924321,
+        NVPW_HW_UNIT_GPC = 1911735839,
+        NVPW_HW_UNIT_GPU = 1014363534,
+        NVPW_HW_UNIT_GR = 2933618517,
+        NVPW_HW_UNIT_IDC = 842765289,
+        NVPW_HW_UNIT_L1TEX = 893940957,
+        NVPW_HW_UNIT_LTS = 2333266697,
+        NVPW_HW_UNIT_NVLRX = 3091684901,
+        NVPW_HW_UNIT_NVLTX = 869679659,
+        NVPW_HW_UNIT_PCIE = 3433264174,
+        NVPW_HW_UNIT_PDA = 345193251,
+        NVPW_HW_UNIT_PES = 804128425,
+        NVPW_HW_UNIT_PROP = 3339255507,
+        NVPW_HW_UNIT_RASTER = 187932504,
+        NVPW_HW_UNIT_SM = 724224710,
+        NVPW_HW_UNIT_SMSP = 2837616917,
+        NVPW_HW_UNIT_SYS = 768990063,
+        NVPW_HW_UNIT_TPC = 1889024613,
+        NVPW_HW_UNIT_VAF = 753670509,
+        NVPW_HW_UNIT_VPC = 275561583,
+        NVPW_HW_UNIT_ZROP = 979500456
+    } NVPW_HwUnit;
+#endif //NVPW_HW_UNIT_DEFINED
+    typedef enum NVPW_RollupOp
+    {
+        NVPW_ROLLUP_OP_AVG = 0,
+        NVPW_ROLLUP_OP_MAX,
+        NVPW_ROLLUP_OP_MIN,
+        NVPW_ROLLUP_OP_SUM,
+        NVPW_ROLLUP_OP__COUNT
+    } NVPW_RollupOp;
+    typedef enum NVPW_MetricType
+    {
+        NVPW_METRIC_TYPE_COUNTER = 0,
+        NVPW_METRIC_TYPE_RATIO,
+        NVPW_METRIC_TYPE_THROUGHPUT,
+        NVPW_METRIC_TYPE__COUNT
+    } NVPW_MetricType;
+    typedef enum NVPW_Submetric
+    {
+        NVPW_SUBMETRIC_NONE = 0,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED = 1,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE = 2,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE_PER_SECOND = 3,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED = 4,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED_PER_SECOND = 5,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME = 6,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME_PER_SECOND = 7,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION = 8,
+        NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION_PER_SECOND = 9,
+        NVPW_SUBMETRIC_PER_CYCLE_ACTIVE = 10,
+        NVPW_SUBMETRIC_PER_CYCLE_ELAPSED = 11,
+        NVPW_SUBMETRIC_PER_CYCLE_IN_FRAME = 12,
+        NVPW_SUBMETRIC_PER_CYCLE_IN_REGION = 13,
+        NVPW_SUBMETRIC_PER_SECOND = 14,
+        NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ACTIVE = 15,
+        NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ELAPSED = 16,
+        NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_FRAME = 17,
+        NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_REGION = 18,
+        NVPW_SUBMETRIC_MAX_RATE = 19,
+        NVPW_SUBMETRIC_PCT = 20,
+        NVPW_SUBMETRIC_RATIO = 21,
+        NVPW_SUBMETRIC__COUNT
+    } NVPW_Submetric;
+    typedef struct NVPW_MetricEvalRequest
+    {
+        /// the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
+        size_t metricIndex;
+        /// one of 'NVPW_MetricType'
+        uint8_t metricType;
+        /// one of 'NVPW_RollupOp', required for Counter and Throughput, doesn't apply to Ratio
+        uint8_t rollupOp;
+        /// one of 'NVPW_Submetric', required for Ratio and Throughput, optional for Counter
+        uint16_t submetric;
+    } NVPW_MetricEvalRequest;
+#define NVPW_MetricEvalRequest_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricEvalRequest, submetric)
+    typedef struct NVPW_DimUnitFactor
+    {
+        /// one of 'NVPW_DimUnitName'
+        uint32_t dimUnit;
+        int8_t exponent;
+    } NVPW_DimUnitFactor;
+#define NVPW_DimUnitFactor_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_DimUnitFactor, exponent)
+    typedef struct NVPW_MetricsEvaluator_Destroy_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+    } NVPW_MetricsEvaluator_Destroy_Params;
+#define NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_Destroy_Params, pMetricsEvaluator)
+    NVPA_Status NVPW_MetricsEvaluator_Destroy(NVPW_MetricsEvaluator_Destroy_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetMetricNames_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] one of 'NVPW_MetricType'
+        uint8_t metricType;
+        /// [out]
+        const char* pMetricNames;
+        /// [out]
+        const size_t* pMetricNameBeginIndices;
+        /// [out]
+        size_t numMetrics;
+    } NVPW_MetricsEvaluator_GetMetricNames_Params;
+#define NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricNames_Params, numMetrics)
+    NVPA_Status NVPW_MetricsEvaluator_GetMetricNames(NVPW_MetricsEvaluator_GetMetricNames_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] can be either a base metric or a metric
+        const char* pMetricName;
+        /// [out] one of 'NVPW_MetricType'
+        uint8_t metricType;
+        /// [out] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
+        size_t metricIndex;
+    } NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params;
+#define NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params, metricIndex)
+    NVPA_Status NVPW_MetricsEvaluator_GetMetricTypeAndIndex(NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in]
+        const char* pMetricName;
+        /// [inout] 'pMetricEvalRequest' is in, '*pMetricEvalRequest' is out
+        struct NVPW_MetricEvalRequest* pMetricEvalRequest;
+        /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
+        size_t metricEvalRequestStructSize;
+    } NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params;
+#define NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params, metricEvalRequestStructSize)
+    NVPA_Status NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_HwUnitToString_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] one of 'NVPW_HwUnit'
+        uint32_t hwUnit;
+        /// [out]
+        const char* pHwUnitName;
+    } NVPW_MetricsEvaluator_HwUnitToString_Params;
+#define NVPW_MetricsEvaluator_HwUnitToString_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_HwUnitToString_Params, pHwUnitName)
+    NVPA_Status NVPW_MetricsEvaluator_HwUnitToString(NVPW_MetricsEvaluator_HwUnitToString_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetCounterProperties_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
+        size_t counterIndex;
+        /// [out]
+        const char* pDescription;
+        /// [out] one of 'NVPW_HwUnit'
+        uint32_t hwUnit;
+    } NVPW_MetricsEvaluator_GetCounterProperties_Params;
+#define NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetCounterProperties_Params, hwUnit)
+    NVPA_Status NVPW_MetricsEvaluator_GetCounterProperties(NVPW_MetricsEvaluator_GetCounterProperties_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetRatioMetricProperties_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
+        size_t ratioMetricIndex;
+        /// [out]
+        const char* pDescription;
+        /// [out]
+        uint64_t hwUnit;
+    } NVPW_MetricsEvaluator_GetRatioMetricProperties_Params;
+#define NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetRatioMetricProperties_Params, hwUnit)
+    NVPA_Status NVPW_MetricsEvaluator_GetRatioMetricProperties(NVPW_MetricsEvaluator_GetRatioMetricProperties_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
+        size_t throughputMetricIndex;
+        /// [out]
+        const char* pDescription;
+        /// [out]
+        uint32_t hwUnit;
+        /// [out] number of constituent counters for the throughput metric
+        size_t numCounters;
+        /// [out] metric indices as in 'NVPW_MetricsEvaluator_GetMetricNames', valid if 'numCounters' > 0, otherwise
+        /// returned as nullptr
+        const size_t* pCounterIndices;
+        /// [out] number of constituent sub-throughputs for the throughput metric
+        size_t numSubThroughputs;
+        /// [out] metric indices as in 'NVPW_MetricsEvaluator_GetMetricNames', valid if 'numSubThroughputs' > 0,
+        /// otherwise returned as nullptr
+        const size_t* pSubThroughputIndices;
+    } NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params;
+#define NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params, pSubThroughputIndices)
+    NVPA_Status NVPW_MetricsEvaluator_GetThroughputMetricProperties(NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] one of 'NVPW_MetricType'
+        uint8_t metricType;
+        /// [out] an array of 'NVPW_Submetric'
+        const uint16_t* pSupportedSubmetrics;
+        /// [out]
+        size_t numSupportedSubmetrics;
+    } NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params;
+#define NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params, numSupportedSubmetrics)
+    NVPA_Status NVPW_MetricsEvaluator_GetSupportedSubmetrics(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetMetricRawDependencies_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in]
+        const struct NVPW_MetricEvalRequest* pMetricEvalRequests;
+        /// [in]
+        size_t numMetricEvalRequests;
+        /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
+        size_t metricEvalRequestStructSize;
+        /// [in] set to sizeof('NVPW_MetricEvalRequest')
+        size_t metricEvalRequestStrideSize;
+        /// [inout] 'ppRawDependencies' is in, '*ppRawDependencies' is out
+        const char** ppRawDependencies;
+        /// [inout] if 'ppRawDependencies' is NULL, number of raw dependencies available will be returned; otherwise it
+        /// should be set to the number of elements allocated for 'ppRawDependencies', and on return, it will be
+        /// overwritten by number of elements copied to 'ppRawDependencies'
+        size_t numRawDependencies;
+        /// [inout] 'ppOptionalRawDependencies' is in, '*ppOptionalRawDependencies' is out
+        const char** ppOptionalRawDependencies;
+        /// [inout] if 'ppOptionalRawDependencies' is NULL, number of optional raw dependencies available will be
+        /// returned; otherwise it should be set to the number of elements allocated for 'ppOptionalRawDependencies',
+        /// and on return, it will be overwritten by number of elements copied to 'ppOptionalRawDependencies'
+        size_t numOptionalRawDependencies;
+    } NVPW_MetricsEvaluator_GetMetricRawDependencies_Params;
+#define NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricRawDependencies_Params, numOptionalRawDependencies)
+    NVPA_Status NVPW_MetricsEvaluator_GetMetricRawDependencies(NVPW_MetricsEvaluator_GetMetricRawDependencies_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_DimUnitToString_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] one of 'NVPW_DimUnitName'
+        uint32_t dimUnit;
+        /// [out]
+        const char* pSingularName;
+        /// [out]
+        const char* pPluralName;
+    } NVPW_MetricsEvaluator_DimUnitToString_Params;
+#define NVPW_MetricsEvaluator_DimUnitToString_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_DimUnitToString_Params, pPluralName)
+    NVPA_Status NVPW_MetricsEvaluator_DimUnitToString(NVPW_MetricsEvaluator_DimUnitToString_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_GetMetricDimUnits_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in]
+        const struct NVPW_MetricEvalRequest* pMetricEvalRequest;
+        /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
+        size_t metricEvalRequestStructSize;
+        /// [inout] 'pDimUnits' is in, '*pDimUnits' is out
+        NVPW_DimUnitFactor* pDimUnits;
+        /// [inout] if 'pDimUnits' is NULL, number of dim-units available will be returned; otherwise it should be set
+        /// to the number of elements allocated for 'pDimUnits', and on return, it will be overwritten by number of
+        /// elements copied to 'pDimUnits'
+        size_t numDimUnits;
+        /// [in] set to 'NVPW_DimUnitFactor_STRUCT_SIZE'
+        size_t dimUnitFactorStructSize;
+    } NVPW_MetricsEvaluator_GetMetricDimUnits_Params;
+#define NVPW_MetricsEvaluator_GetMetricDimUnits_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricDimUnits_Params, dimUnitFactorStructSize)
+    NVPA_Status NVPW_MetricsEvaluator_GetMetricDimUnits(NVPW_MetricsEvaluator_GetMetricDimUnits_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_SetUserData_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in] duration in ns of user defined frame
+        double frameDuration;
+        /// [in] duration in ns of user defined region
+        double regionDuration;
+        /// [in]
+        NVPA_Bool isolated;
+    } NVPW_MetricsEvaluator_SetUserData_Params;
+#define NVPW_MetricsEvaluator_SetUserData_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_SetUserData_Params, isolated)
+    NVPA_Status NVPW_MetricsEvaluator_SetUserData(NVPW_MetricsEvaluator_SetUserData_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_EvaluateToGpuValues_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in]
+        const struct NVPW_MetricEvalRequest* pMetricEvalRequests;
+        /// [in]
+        size_t numMetricEvalRequests;
+        /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
+        size_t metricEvalRequestStructSize;
+        /// [in] set to sizeof('NVPW_MetricEvalRequest')
+        size_t metricEvalRequestStrideSize;
+        /// [in]
+        const uint8_t* pCounterDataImage;
+        /// [in]
+        size_t counterDataImageSize;
+        /// [in]
+        size_t rangeIndex;
+        /// [in]
+        NVPA_Bool isolated;
+        /// [inout] 'pMetricValues' is in, '*pMetricValues' is out
+        double* pMetricValues;
+    } NVPW_MetricsEvaluator_EvaluateToGpuValues_Params;
+#define NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_EvaluateToGpuValues_Params, pMetricValues)
+    NVPA_Status NVPW_MetricsEvaluator_EvaluateToGpuValues(NVPW_MetricsEvaluator_EvaluateToGpuValues_Params* pParams);
+    typedef struct NVPW_MetricsEvaluator_SetDeviceAttributes_Params
+    {
+        /// [in]
+        size_t structSize;
+        /// [in] assign to NULL
+        void* pPriv;
+        /// [in]
+        struct NVPW_MetricsEvaluator* pMetricsEvaluator;
+        /// [in]
+        const uint8_t* pCounterDataImage;
+        /// [in]
+        size_t counterDataImageSize;
+    } NVPW_MetricsEvaluator_SetDeviceAttributes_Params;
+#define NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_SetDeviceAttributes_Params, counterDataImageSize)
+    NVPA_Status NVPW_MetricsEvaluator_SetDeviceAttributes(NVPW_MetricsEvaluator_SetDeviceAttributes_Params* pParams);
+/**
+ *  @}
+ ******************************************************************************/
+#endif // NVPERF_HOST_API_DEFINED
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+    #pragma GCC visibility pop
+#endif
+#endif // NVPERF_HOST_H

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (216 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (224 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverMg.h ADDED Viewed

	@@ -0,0 +1,318 @@

+/*
+ * Copyright 2019 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(CUSOLVERMG_H_)
+  #define CUSOLVERMG_H_
+  #include <stdint.h>
+  #include "cusolverDn.h"
+  #if defined(__cplusplus)
+extern "C" {
+  #endif /* __cplusplus */
+  struct cusolverMgContext;
+  typedef struct cusolverMgContext *cusolverMgHandle_t;
+  /**
+   * \beief This enum decides how 1D device Ids (or process ranks) get mapped to
+   * a 2D grid.
+   */
+  typedef enum {
+    CUDALIBMG_GRID_MAPPING_ROW_MAJOR = 1,
+    CUDALIBMG_GRID_MAPPING_COL_MAJOR = 0
+  } cusolverMgGridMapping_t;
+  /** \brief Opaque structure of the distributed grid */
+  typedef void *cudaLibMgGrid_t;
+  /** \brief Opaque structure of the distributed matrix descriptor */
+  typedef void *cudaLibMgMatrixDesc_t;
+  cusolverStatus_t CUSOLVERAPI cusolverMgCreate(cusolverMgHandle_t *handle);
+  cusolverStatus_t CUSOLVERAPI cusolverMgDestroy(cusolverMgHandle_t handle);
+  cusolverStatus_t CUSOLVERAPI cusolverMgDeviceSelect(
+    cusolverMgHandle_t handle,
+    int                nbDevices,
+    int                deviceId[]);
+  /**
+   * \brief Allocates resources related to the shared memory device grid.
+   * \param[out] grid the opaque data strcuture that holds the grid
+   * \param[in] numRowDevices number of devices in the row
+   * \param[in] numColDevices number of devices in the column
+   * \param[in] deviceId This array of size height * width stores the
+   *            device-ids of the 2D grid; each entry must correspond to a valid
+   * gpu or to -1 (denoting CPU). \param[in] mapping whether the 2D grid is in
+   * row/column major \returns the status code
+   */
+  cusolverStatus_t CUSOLVERAPI cusolverMgCreateDeviceGrid(
+    cudaLibMgGrid_t *       grid,
+    int32_t                 numRowDevices,
+    int32_t                 numColDevices,
+    const int32_t           deviceId[],
+    cusolverMgGridMapping_t mapping);
+  /**
+   * \brief Releases the allocated resources related to the distributed grid.
+   * \param[in] grid the opaque data strcuture that holds the distributed grid
+   * \returns the status code
+   */
+  cusolverStatus_t CUSOLVERAPI cusolverMgDestroyGrid(cudaLibMgGrid_t grid);
+  /**
+   * \brief Allocates resources related to the distributed matrix descriptor.
+   * \param[out] desc the opaque data strcuture that holds the descriptor
+   * \param[in] numRows number of total rows
+   * \param[in] numCols number of total columns
+   * \param[in] rowBlockSize row block size
+   * \param[in] colBlockSize column block size
+   * \param[in] dataType the data type of each element in cudaDataType
+   * \param[in] grid the opaque data structure of the distributed grid
+   * \returns the status code
+   */
+  cusolverStatus_t CUSOLVERAPI cusolverMgCreateMatrixDesc(
+    cudaLibMgMatrixDesc_t *desc,
+    int64_t                numRows,
+    int64_t                numCols,
+    int64_t                rowBlockSize,
+    int64_t                colBlockSize,
+    cudaDataType           dataType,
+    const cudaLibMgGrid_t  grid);
+  /**
+   * \brief Releases the allocated resources related to the distributed matrix
+   * descriptor. \param[in] desc the opaque data strcuture that holds the
+   * descriptor \returns the status code
+   */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverMgDestroyMatrixDesc(cudaLibMgMatrixDesc_t desc);
+  cusolverStatus_t CUSOLVERAPI cusolverMgSyevd_bufferSize(
+    cusolverMgHandle_t    handle,
+    cusolverEigMode_t     jobz,
+    cublasFillMode_t      uplo,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    void *                W,
+    cudaDataType          dataTypeW,
+    cudaDataType          computeType,
+    int64_t *             lwork);
+  cusolverStatus_t CUSOLVERAPI cusolverMgSyevd(
+    cusolverMgHandle_t    handle,
+    cusolverEigMode_t     jobz,
+    cublasFillMode_t      uplo,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    void *                W,
+    cudaDataType          dataTypeW,
+    cudaDataType          computeType,
+    void *                array_d_work[],
+    int64_t               lwork,
+    int *                 info);
+  cusolverStatus_t CUSOLVERAPI cusolverMgGetrf_bufferSize(
+    cusolverMgHandle_t    handle,
+    int                   M,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    int *                 array_d_IPIV[],
+    cudaDataType          computeType,
+    int64_t *             lwork);
+  cusolverStatus_t CUSOLVERAPI cusolverMgGetrf(
+    cusolverMgHandle_t    handle,
+    int                   M,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    int *                 array_d_IPIV[],
+    cudaDataType          computeType,
+    void *                array_d_work[],
+    int64_t               lwork,
+    int *                 info);
+  cusolverStatus_t CUSOLVERAPI cusolverMgGetrs_bufferSize(
+    cusolverMgHandle_t    handle,
+    cublasOperation_t     TRANS,
+    int                   N,
+    int                   NRHS,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    int *                 array_d_IPIV[],
+    void *                array_d_B[],
+    int                   IB,
+    int                   JB,
+    cudaLibMgMatrixDesc_t descrB,
+    cudaDataType          computeType,
+    int64_t *             lwork);
+  cusolverStatus_t CUSOLVERAPI cusolverMgGetrs(
+    cusolverMgHandle_t    handle,
+    cublasOperation_t     TRANS,
+    int                   N,
+    int                   NRHS,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    int *                 array_d_IPIV[],
+    void *                array_d_B[],
+    int                   IB,
+    int                   JB,
+    cudaLibMgMatrixDesc_t descrB,
+    cudaDataType          computeType,
+    void *                array_d_work[],
+    int64_t               lwork,
+    int *                 info);
+  cusolverStatus_t CUSOLVERAPI cusolverMgPotrf_bufferSize(
+    cusolverMgHandle_t    handle,
+    cublasFillMode_t      uplo,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    cudaDataType          computeType,
+    int64_t *             lwork);
+  cusolverStatus_t CUSOLVERAPI cusolverMgPotrf(
+    cusolverMgHandle_t    handle,
+    cublasFillMode_t      uplo,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    cudaDataType          computeType,
+    void *                array_d_work[],
+    int64_t               lwork,
+    int *                 h_info);
+  cusolverStatus_t CUSOLVERAPI cusolverMgPotrs_bufferSize(
+    cusolverMgHandle_t    handle,
+    cublasFillMode_t      uplo,
+    int                   n,
+    int                   nrhs,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    void *                array_d_B[],
+    int                   IB,
+    int                   JB,
+    cudaLibMgMatrixDesc_t descrB,
+    cudaDataType          computeType,
+    int64_t *             lwork);
+  cusolverStatus_t CUSOLVERAPI cusolverMgPotrs(
+    cusolverMgHandle_t    handle,
+    cublasFillMode_t      uplo,
+    int                   n,
+    int                   nrhs,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    void *                array_d_B[],
+    int                   IB,
+    int                   JB,
+    cudaLibMgMatrixDesc_t descrB,
+    cudaDataType          computeType,
+    void *                array_d_work[],
+    int64_t               lwork,
+    int *                 h_info);
+  cusolverStatus_t CUSOLVERAPI cusolverMgPotri_bufferSize(
+    cusolverMgHandle_t    handle,
+    cublasFillMode_t      uplo,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    cudaDataType          computeType,
+    int64_t *             lwork);
+  cusolverStatus_t CUSOLVERAPI cusolverMgPotri(
+    cusolverMgHandle_t    handle,
+    cublasFillMode_t      uplo,
+    int                   N,
+    void *                array_d_A[],
+    int                   IA,
+    int                   JA,
+    cudaLibMgMatrixDesc_t descrA,
+    cudaDataType          computeType,
+    void *                array_d_work[],
+    int64_t               lwork,
+    int *                 h_info);
+  #if defined(__cplusplus)
+}
+  #endif /* __cplusplus */
+#endif // CUSOLVERMG_H_

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverRf.h ADDED Viewed

	@@ -0,0 +1,339 @@

+/*
+ * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(CUSOLVERRF_H_)
+  #define CUSOLVERRF_H_
+  #include "driver_types.h"
+  #include "cuComplex.h"
+  #include "cusolver_common.h"
+  #if defined(__cplusplus)
+extern "C" {
+  #endif /* __cplusplus */
+  /* CUSOLVERRF mode */
+  typedef enum {
+    CUSOLVERRF_RESET_VALUES_FAST_MODE_OFF = 0, // default
+    CUSOLVERRF_RESET_VALUES_FAST_MODE_ON = 1
+  } cusolverRfResetValuesFastMode_t;
+  /* CUSOLVERRF matrix format */
+  typedef enum {
+    CUSOLVERRF_MATRIX_FORMAT_CSR = 0, // default
+    CUSOLVERRF_MATRIX_FORMAT_CSC = 1
+  } cusolverRfMatrixFormat_t;
+  /* CUSOLVERRF unit diagonal */
+  typedef enum {
+    CUSOLVERRF_UNIT_DIAGONAL_STORED_L = 0, // default
+    CUSOLVERRF_UNIT_DIAGONAL_STORED_U = 1,
+    CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_L = 2,
+    CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_U = 3
+  } cusolverRfUnitDiagonal_t;
+  /* CUSOLVERRF factorization algorithm */
+  typedef enum {
+    CUSOLVERRF_FACTORIZATION_ALG0 = 0, // default
+    CUSOLVERRF_FACTORIZATION_ALG1 = 1,
+    CUSOLVERRF_FACTORIZATION_ALG2 = 2,
+  } cusolverRfFactorization_t;
+  /* CUSOLVERRF triangular solve algorithm */
+  typedef enum {
+    CUSOLVERRF_TRIANGULAR_SOLVE_ALG1 = 1, // default
+    CUSOLVERRF_TRIANGULAR_SOLVE_ALG2 = 2,
+    CUSOLVERRF_TRIANGULAR_SOLVE_ALG3 = 3
+  } cusolverRfTriangularSolve_t;
+  /* CUSOLVERRF numeric boost report */
+  typedef enum {
+    CUSOLVERRF_NUMERIC_BOOST_NOT_USED = 0, // default
+    CUSOLVERRF_NUMERIC_BOOST_USED = 1
+  } cusolverRfNumericBoostReport_t;
+  /* Opaque structure holding CUSOLVERRF library common */
+  struct cusolverRfCommon;
+  typedef struct cusolverRfCommon* cusolverRfHandle_t;
+  /* CUSOLVERRF create (allocate memory) and destroy (free memory) in the handle
+   */
+  cusolverStatus_t CUSOLVERAPI cusolverRfCreate(cusolverRfHandle_t* handle);
+  cusolverStatus_t CUSOLVERAPI cusolverRfDestroy(cusolverRfHandle_t handle);
+  /* CUSOLVERRF set and get input format */
+  cusolverStatus_t CUSOLVERAPI cusolverRfGetMatrixFormat(
+    cusolverRfHandle_t        handle,
+    cusolverRfMatrixFormat_t* format,
+    cusolverRfUnitDiagonal_t* diag);
+  cusolverStatus_t CUSOLVERAPI cusolverRfSetMatrixFormat(
+    cusolverRfHandle_t       handle,
+    cusolverRfMatrixFormat_t format,
+    cusolverRfUnitDiagonal_t diag);
+  /* CUSOLVERRF set and get numeric properties */
+  cusolverStatus_t CUSOLVERAPI cusolverRfSetNumericProperties(
+    cusolverRfHandle_t handle,
+    double             zero,
+    double             boost);
+  cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericProperties(
+    cusolverRfHandle_t handle,
+    double*            zero,
+    double*            boost);
+  cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericBoostReport(
+    cusolverRfHandle_t              handle,
+    cusolverRfNumericBoostReport_t* report);
+  /* CUSOLVERRF choose the triangular solve algorithm */
+  cusolverStatus_t CUSOLVERAPI cusolverRfSetAlgs(
+    cusolverRfHandle_t          handle,
+    cusolverRfFactorization_t   factAlg,
+    cusolverRfTriangularSolve_t solveAlg);
+  cusolverStatus_t CUSOLVERAPI cusolverRfGetAlgs(
+    cusolverRfHandle_t           handle,
+    cusolverRfFactorization_t*   factAlg,
+    cusolverRfTriangularSolve_t* solveAlg);
+  /* CUSOLVERRF set and get fast mode */
+  cusolverStatus_t CUSOLVERAPI cusolverRfGetResetValuesFastMode(
+    cusolverRfHandle_t               handle,
+    cusolverRfResetValuesFastMode_t* fastMode);
+  cusolverStatus_t CUSOLVERAPI cusolverRfSetResetValuesFastMode(
+    cusolverRfHandle_t              handle,
+    cusolverRfResetValuesFastMode_t fastMode);
+  /*** Non-Batched Routines ***/
+  /* CUSOLVERRF setup of internal structures from host or device memory */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfSetupHost(/* Input (in the host memory) */
+                        int     n,
+                        int     nnzA,
+                        int*    h_csrRowPtrA,
+                        int*    h_csrColIndA,
+                        double* h_csrValA,
+                        int     nnzL,
+                        int*    h_csrRowPtrL,
+                        int*    h_csrColIndL,
+                        double* h_csrValL,
+                        int     nnzU,
+                        int*    h_csrRowPtrU,
+                        int*    h_csrColIndU,
+                        double* h_csrValU,
+                        int*    h_P,
+                        int*    h_Q,
+                        /* Output */
+                        cusolverRfHandle_t handle);
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfSetupDevice(/* Input (in the device memory) */
+                          int     n,
+                          int     nnzA,
+                          int*    csrRowPtrA,
+                          int*    csrColIndA,
+                          double* csrValA,
+                          int     nnzL,
+                          int*    csrRowPtrL,
+                          int*    csrColIndL,
+                          double* csrValL,
+                          int     nnzU,
+                          int*    csrRowPtrU,
+                          int*    csrColIndU,
+                          double* csrValU,
+                          int*    P,
+                          int*    Q,
+                          /* Output */
+                          cusolverRfHandle_t handle);
+  /* CUSOLVERRF update the matrix values (assuming the reordering, pivoting
+     and consequently the sparsity pattern of L and U did not change),
+     and zero out the remaining values. */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfResetValues(/* Input (in the device memory) */
+                          int     n,
+                          int     nnzA,
+                          int*    csrRowPtrA,
+                          int*    csrColIndA,
+                          double* csrValA,
+                          int*    P,
+                          int*    Q,
+                          /* Output */
+                          cusolverRfHandle_t handle);
+  /* CUSOLVERRF analysis (for parallelism) */
+  cusolverStatus_t CUSOLVERAPI cusolverRfAnalyze(cusolverRfHandle_t handle);
+  /* CUSOLVERRF re-factorization (for parallelism) */
+  cusolverStatus_t CUSOLVERAPI cusolverRfRefactor(cusolverRfHandle_t handle);
+  /* CUSOLVERRF extraction: Get L & U packed into a single matrix M */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfAccessBundledFactorsDevice(/* Input */
+                                         cusolverRfHandle_t handle,
+                                         /* Output (in the host memory) */
+                                         int* nnzM,
+                                         /* Output (in the device memory) */
+                                         int**    Mp,
+                                         int**    Mi,
+                                         double** Mx);
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfExtractBundledFactorsHost(/* Input */
+                                        cusolverRfHandle_t handle,
+                                        /* Output (in the host memory) */
+                                        int*     h_nnzM,
+                                        int**    h_Mp,
+                                        int**    h_Mi,
+                                        double** h_Mx);
+  /* CUSOLVERRF extraction: Get L & U individually */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfExtractSplitFactorsHost(/* Input */
+                                      cusolverRfHandle_t handle,
+                                      /* Output (in the host memory) */
+                                      int*     h_nnzL,
+                                      int**    h_csrRowPtrL,
+                                      int**    h_csrColIndL,
+                                      double** h_csrValL,
+                                      int*     h_nnzU,
+                                      int**    h_csrRowPtrU,
+                                      int**    h_csrColIndU,
+                                      double** h_csrValU);
+  /* CUSOLVERRF (forward and backward triangular) solves */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfSolve(/* Input (in the device memory) */
+                    cusolverRfHandle_t handle,
+                    int*               P,
+                    int*               Q,
+                    int                nrhs, // only nrhs=1 is supported
+                    double*            Temp, // of size ldt*nrhs (ldt>=n)
+                    int                ldt,
+                    /* Input/Output (in the device memory) */
+                    double* XF,
+                    /* Input */
+                    int ldxf);
+  /*** Batched Routines ***/
+  /* CUSOLVERRF-batch setup of internal structures from host */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfBatchSetupHost(/* Input (in the host memory)*/
+                             int     batchSize,
+                             int     n,
+                             int     nnzA,
+                             int*    h_csrRowPtrA,
+                             int*    h_csrColIndA,
+                             double* h_csrValA_array[],
+                             int     nnzL,
+                             int*    h_csrRowPtrL,
+                             int*    h_csrColIndL,
+                             double* h_csrValL,
+                             int     nnzU,
+                             int*    h_csrRowPtrU,
+                             int*    h_csrColIndU,
+                             double* h_csrValU,
+                             int*    h_P,
+                             int*    h_Q,
+                             /* Output (in the device memory) */
+                             cusolverRfHandle_t handle);
+  /* CUSOLVERRF-batch update the matrix values (assuming the reordering,
+     pivoting and consequently the sparsity pattern of L and U did not change),
+     and zero out the remaining values. */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfBatchResetValues(/* Input (in the device memory) */
+                               int     batchSize,
+                               int     n,
+                               int     nnzA,
+                               int*    csrRowPtrA,
+                               int*    csrColIndA,
+                               double* csrValA_array[],
+                               int*    P,
+                               int*    Q,
+                               /* Output */
+                               cusolverRfHandle_t handle);
+  /* CUSOLVERRF-batch analysis (for parallelism) */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfBatchAnalyze(cusolverRfHandle_t handle);
+  /* CUSOLVERRF-batch re-factorization (for parallelism) */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfBatchRefactor(cusolverRfHandle_t handle);
+  /* CUSOLVERRF-batch (forward and backward triangular) solves */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfBatchSolve(/* Input (in the device memory) */
+                         cusolverRfHandle_t handle,
+                         int*               P,
+                         int*               Q,
+                         int                nrhs, // only nrhs=1 is supported
+                         double* Temp, // of size 2*batchSize*(n*nrhs)
+                         int     ldt,  // only ldt=n is supported
+                         /* Input/Output (in the device memory) */
+                         double* XF_array[],
+                         /* Input */
+                         int ldxf);
+  /* CUSOLVERRF-batch obtain the position of zero pivot */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverRfBatchZeroPivot(/* Input */
+                             cusolverRfHandle_t handle,
+                             /* Output (in the host memory) */
+                             int* position);
+  #if defined(__cplusplus)
+}
+  #endif /* __cplusplus */
+#endif /* CUSOLVERRF_H_ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverSp_LOWLEVEL_PREVIEW.h ADDED Viewed

	@@ -0,0 +1,1107 @@

+/*
+ * Copyright 2015 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(CUSOLVERSP_LOWLEVEL_PREVIEW_H_)
+  #define CUSOLVERSP_LOWLEVEL_PREVIEW_H_
+  #include "cusolverSp.h"
+  #if defined(__cplusplus)
+extern "C" {
+  #endif /* __cplusplus */
+  struct csrluInfoHost;
+  typedef struct csrluInfoHost *csrluInfoHost_t;
+  struct csrqrInfoHost;
+  typedef struct csrqrInfoHost *csrqrInfoHost_t;
+  struct csrcholInfoHost;
+  typedef struct csrcholInfoHost *csrcholInfoHost_t;
+  struct csrcholInfo;
+  typedef struct csrcholInfo *csrcholInfo_t;
+  /*
+   * Low level API for CPU LU
+   *
+   */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverSpCreateCsrluInfoHost(csrluInfoHost_t *info);
+  cusolverStatus_t CUSOLVERAPI
+    cusolverSpDestroyCsrluInfoHost(csrluInfoHost_t info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpXcsrluAnalysisHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrluBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrluFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    float                    pivot_threshold,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    double                   pivot_threshold,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    float                    pivot_threshold,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrluInfoHost_t          info,
+    double                   pivot_threshold,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrluZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrluInfoHost_t    info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrluInfoHost_t    info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrluInfoHost_t    info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrluInfoHost_t    info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrluSolveHost(
+    cusolverSpHandle_t handle,
+    int                n,
+    const float *      b,
+    float *            x,
+    csrluInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluSolveHost(
+    cusolverSpHandle_t handle,
+    int                n,
+    const double *     b,
+    double *           x,
+    csrluInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluSolveHost(
+    cusolverSpHandle_t handle,
+    int                n,
+    const cuComplex *  b,
+    cuComplex *        x,
+    csrluInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluSolveHost(
+    cusolverSpHandle_t     handle,
+    int                    n,
+    const cuDoubleComplex *b,
+    cuDoubleComplex *      x,
+    csrluInfoHost_t        info,
+    void *                 pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpXcsrluNnzHost(
+    cusolverSpHandle_t handle,
+    int *              nnzLRef,
+    int *              nnzURef,
+    csrluInfoHost_t    info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrluExtractHost(
+    cusolverSpHandle_t       handle,
+    int *                    P,
+    int *                    Q,
+    const cusparseMatDescr_t descrL,
+    float *                  csrValL,
+    int *                    csrRowPtrL,
+    int *                    csrColIndL,
+    const cusparseMatDescr_t descrU,
+    float *                  csrValU,
+    int *                    csrRowPtrU,
+    int *                    csrColIndU,
+    csrluInfoHost_t          info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluExtractHost(
+    cusolverSpHandle_t       handle,
+    int *                    P,
+    int *                    Q,
+    const cusparseMatDescr_t descrL,
+    double *                 csrValL,
+    int *                    csrRowPtrL,
+    int *                    csrColIndL,
+    const cusparseMatDescr_t descrU,
+    double *                 csrValU,
+    int *                    csrRowPtrU,
+    int *                    csrColIndU,
+    csrluInfoHost_t          info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluExtractHost(
+    cusolverSpHandle_t       handle,
+    int *                    P,
+    int *                    Q,
+    const cusparseMatDescr_t descrL,
+    cuComplex *              csrValL,
+    int *                    csrRowPtrL,
+    int *                    csrColIndL,
+    const cusparseMatDescr_t descrU,
+    cuComplex *              csrValU,
+    int *                    csrRowPtrU,
+    int *                    csrColIndU,
+    csrluInfoHost_t          info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluExtractHost(
+    cusolverSpHandle_t       handle,
+    int *                    P,
+    int *                    Q,
+    const cusparseMatDescr_t descrL,
+    cuDoubleComplex *        csrValL,
+    int *                    csrRowPtrL,
+    int *                    csrColIndL,
+    const cusparseMatDescr_t descrU,
+    cuDoubleComplex *        csrValU,
+    int *                    csrRowPtrU,
+    int *                    csrColIndU,
+    csrluInfoHost_t          info,
+    void *                   pBuffer);
+  /*
+   * Low level API for CPU QR
+   *
+   */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverSpCreateCsrqrInfoHost(csrqrInfoHost_t *info);
+  cusolverStatus_t CUSOLVERAPI
+    cusolverSpDestroyCsrqrInfoHost(csrqrInfoHost_t info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpXcsrqrAnalysisHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfoHost_t          info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfoHost_t          info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSetupHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    float                    mu,
+    csrqrInfoHost_t          info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSetupHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    double                   mu,
+    csrqrInfoHost_t          info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSetupHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    cuComplex                mu,
+    csrqrInfoHost_t          info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSetupHost(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    cuDoubleComplex          mu,
+    csrqrInfoHost_t          info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrFactorHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    float *            b,
+    float *            x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrFactorHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    double *           b,
+    double *           x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrFactorHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    cuComplex *        b,
+    cuComplex *        x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrFactorHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    cuDoubleComplex *  b,
+    cuDoubleComplex *  x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrqrInfoHost_t    info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrqrInfoHost_t    info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrqrInfoHost_t    info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrqrInfoHost_t    info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSolveHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    float *            b,
+    float *            x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSolveHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    double *           b,
+    double *           x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSolveHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    cuComplex *        b,
+    cuComplex *        x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSolveHost(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    cuDoubleComplex *  b,
+    cuDoubleComplex *  x,
+    csrqrInfoHost_t    info,
+    void *             pBuffer);
+  /*
+   * Low level API for GPU QR
+   *
+   */
+  cusolverStatus_t CUSOLVERAPI cusolverSpXcsrqrAnalysis(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfo_t              info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfo_t              info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfo_t              info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfo_t              info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrqrInfo_t              info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSetup(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    float                    mu,
+    csrqrInfo_t              info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSetup(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    double                   mu,
+    csrqrInfo_t              info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSetup(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    cuComplex                mu,
+    csrqrInfo_t              info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSetup(
+    cusolverSpHandle_t       handle,
+    int                      m,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    cuDoubleComplex          mu,
+    csrqrInfo_t              info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrFactor(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    float *            b,
+    float *            x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrFactor(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    double *           b,
+    double *           x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrFactor(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    cuComplex *        b,
+    cuComplex *        x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrFactor(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    int                nnzA,
+    cuDoubleComplex *  b,
+    cuDoubleComplex *  x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrZeroPivot(
+    cusolverSpHandle_t handle,
+    csrqrInfo_t        info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrZeroPivot(
+    cusolverSpHandle_t handle,
+    csrqrInfo_t        info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrZeroPivot(
+    cusolverSpHandle_t handle,
+    csrqrInfo_t        info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrZeroPivot(
+    cusolverSpHandle_t handle,
+    csrqrInfo_t        info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSolve(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    float *            b,
+    float *            x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSolve(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    double *           b,
+    double *           x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSolve(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    cuComplex *        b,
+    cuComplex *        x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSolve(
+    cusolverSpHandle_t handle,
+    int                m,
+    int                n,
+    cuDoubleComplex *  b,
+    cuDoubleComplex *  x,
+    csrqrInfo_t        info,
+    void *             pBuffer);
+  /*
+   * Low level API for CPU Cholesky
+   *
+   */
+  cusolverStatus_t CUSOLVERAPI
+    cusolverSpCreateCsrcholInfoHost(csrcholInfoHost_t *info);
+  cusolverStatus_t CUSOLVERAPI
+    cusolverSpDestroyCsrcholInfoHost(csrcholInfoHost_t info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpXcsrcholAnalysisHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholBufferInfoHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholFactorHost(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfoHost_t        info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrcholInfoHost_t  info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrcholInfoHost_t  info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrcholInfoHost_t  info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholZeroPivotHost(
+    cusolverSpHandle_t handle,
+    csrcholInfoHost_t  info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholSolveHost(
+    cusolverSpHandle_t handle,
+    int                n,
+    const float *      b,
+    float *            x,
+    csrcholInfoHost_t  info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholSolveHost(
+    cusolverSpHandle_t handle,
+    int                n,
+    const double *     b,
+    double *           x,
+    csrcholInfoHost_t  info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholSolveHost(
+    cusolverSpHandle_t handle,
+    int                n,
+    const cuComplex *  b,
+    cuComplex *        x,
+    csrcholInfoHost_t  info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholSolveHost(
+    cusolverSpHandle_t     handle,
+    int                    n,
+    const cuDoubleComplex *b,
+    cuDoubleComplex *      x,
+    csrcholInfoHost_t      info,
+    void *                 pBuffer);
+  /*
+   * Low level API for GPU Cholesky
+   *
+   */
+  cusolverStatus_t CUSOLVERAPI cusolverSpCreateCsrcholInfo(csrcholInfo_t *info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDestroyCsrcholInfo(csrcholInfo_t info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpXcsrcholAnalysis(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholBufferInfo(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    size_t *                 internalDataInBytes,
+    size_t *                 workspaceInBytes);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholFactor(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const float *            csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholFactor(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const double *           csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholFactor(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuComplex *        csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholFactor(
+    cusolverSpHandle_t       handle,
+    int                      n,
+    int                      nnzA,
+    const cusparseMatDescr_t descrA,
+    const cuDoubleComplex *  csrValA,
+    const int *              csrRowPtrA,
+    const int *              csrColIndA,
+    csrcholInfo_t            info,
+    void *                   pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholZeroPivot(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholZeroPivot(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholZeroPivot(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    float              tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholZeroPivot(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    double             tol,
+    int *              position);
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholSolve(
+    cusolverSpHandle_t handle,
+    int                n,
+    const float *      b,
+    float *            x,
+    csrcholInfo_t      info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholSolve(
+    cusolverSpHandle_t handle,
+    int                n,
+    const double *     b,
+    double *           x,
+    csrcholInfo_t      info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholSolve(
+    cusolverSpHandle_t handle,
+    int                n,
+    const cuComplex *  b,
+    cuComplex *        x,
+    csrcholInfo_t      info,
+    void *             pBuffer);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholSolve(
+    cusolverSpHandle_t     handle,
+    int                    n,
+    const cuDoubleComplex *b,
+    cuDoubleComplex *      x,
+    csrcholInfo_t          info,
+    void *                 pBuffer);
+  /*
+   * "diag" is a device array of size N.
+   * cusolverSp<t>csrcholDiag returns diag(L) to "diag" where A(P,P) = L*L**T
+   * "diag" can estimate det(A) because det(A(P,P)) = det(A) = det(L)^2 if A =
+   * L*L**T.
+   *
+   * cusolverSp<t>csrcholDiag must be called after cusolverSp<t>csrcholFactor.
+   * otherwise "diag" is wrong.
+   */
+  cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    float *            diag);
+  cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    double *           diag);
+  cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    float *            diag);
+  cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t      info,
+    double *           diag);
+  #if defined(__cplusplus)
+}
+  #endif /* __cplusplus */
+#endif // CUSOLVERSP_LOWLEVEL_PREVIEW_H_

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/lib/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (212 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/include/nccl_net.h ADDED Viewed

	@@ -0,0 +1,456 @@

+/*************************************************************************
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * See LICENSE.txt for license information
+ ************************************************************************/
+#ifndef NCCL_NET_H_
+#define NCCL_NET_H_
+#include "nccl.h"
+#include "nccl_common.h"
+#include "net_device.h"
+#include <stdint.h>
+#define NCCL_NET_HANDLE_MAXSIZE 128
+#define NCCL_PTR_HOST 0x1
+#define NCCL_PTR_CUDA 0x2
+#define NCCL_PTR_DMABUF 0x4
+// Maximum number of requests per comm object
+#define NCCL_NET_MAX_REQUESTS 32
+typedef struct {
+  char* name;                      // Used mostly for logging.
+  char* pciPath;                   // Path to the PCI device in /sys.
+  uint64_t guid;                   // Unique identifier for the NIC chip. Important for
+                                   // cards with multiple PCI functions (Physical or virtual).
+  int ptrSupport;                  // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF]
+  int regIsGlobal;                 // regMr is not tied to a particular comm
+  int speed;                       // Port speed in Mbps.
+  int port;                        // Port number.
+  float latency;                   // Network latency
+  int maxComms;                    // Maximum number of comms we can create
+  int maxRecvs;                    // Maximum number of grouped receives.
+  ncclNetDeviceType netDeviceType; // Network offload type
+  int netDeviceVersion;            // Version number for network offload
+} ncclNetProperties_v8_t;
+typedef ncclNetProperties_v8_t ncclNetProperties_t;
+typedef struct {
+  // Name of the network (mainly for logs)
+  const char* name;
+  // Initialize the network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v8_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create a connection.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Connect to a handle and return a sending comm object for that peer.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with sendComm == NULL with the expectation that
+  // it will be called again until sendComm != NULL.
+  // If *sendDevComm points to a valid object, then NCCL is requesting device offload for this connection
+  ncclResult_t (*connect)(int dev, void* handle, void** sendComm, ncclNetDeviceHandle_v8_t** sendDevComm);
+  // Finalize connection establishment after remote peer has called connect.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with recvComm == NULL with the expectation that
+  // it will be called again until recvComm != NULL.
+  // If *recvDevComm points to a valid object, then NCCL is requesting device offload for this connection
+  ncclResult_t (*accept)(void* listenComm, void** recvComm, ncclNetDeviceHandle_v8_t** recvDevComm);
+  // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
+  // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* comm, void* data, size_t size, int type, void** mhandle);
+  /* DMA-BUF support */
+  ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
+  ncclResult_t (*deregMr)(void* comm, void* mhandle);
+  // Asynchronous send to a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
+  // Asynchronous recv from a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* sizes);
+  // Close and free send/recv comm objects
+  ncclResult_t (*closeSend)(void* sendComm);
+  ncclResult_t (*closeRecv)(void* recvComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+  // Copy the given mhandle to a dptr in a format usable by this plugin's device code
+  ncclResult_t (*getDeviceMr)(void* comm, void* mhandle, void** dptr_mhandle);
+  // Notify the plugin that a recv has completed by the device
+  ncclResult_t (*irecvConsumed)(void* recvComm, int n, void* request);
+} ncclNet_v8_t;
+typedef ncclNet_v8_t ncclNet_t;
+#define NCCL_NET_PLUGIN_SYMBOL ncclNetPlugin_v8
+typedef struct {
+  void* mhandle;
+  void* address;
+  uint32_t size;
+} ncclNetSGE_v8_t;
+typedef struct {
+  // Name of the collective network (mainly for logs)
+  const char* name;
+  // Initialize the collective network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters capable of doing collective operations.
+  // If ndev returns 0, all other functions might be set to NULL.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v8_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create connections.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Create a group for collective operations. handles have been created
+  // using listen() above. rank indicates caller's rank in the collective network.
+  ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
+  // Returns whether a reduction operation on a data type is supported.
+  // 1 for supported, 0 otherwise.
+  ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
+  // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* collComm, void* data, size_t size, int type, void** mhandle);
+  /* DMA-BUF support */
+  ncclResult_t (*regMrDmaBuf)(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
+  ncclResult_t (*deregMr)(void* collComm, void* mhandle);
+  // Performs an asynchronous allreduce operation on the collective group.
+  // May return request == NULL if the call cannot be performed (or would block).
+  ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
+      ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
+  ncclResult_t (*iallgather)(void* collComm, void* sendData, int nRecvParts, ncclNetSGE_v8_t* recvParts,
+                             size_t bytesPerRank, size_t windowOffset, size_t windowBytes,
+                             void* sendMhandle, void** request);
+  ncclResult_t (*ireducescatter)(void* collComm, int nSendParts, ncclNetSGE_v8_t* sendParts, void* recvData,
+                                 size_t bytesPerRank, size_t windowOffset, size_t windowBytes,
+                                 ncclDataType_t dataType, ncclRedOp_t redOp,
+                                 void* recvMhandle, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* size);
+  // Close and free collective comm objects
+  ncclResult_t (*closeColl)(void* collComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+} ncclCollNet_v8_t;
+typedef ncclCollNet_v8_t ncclCollNet_t;
+#define NCCL_COLLNET_PLUGIN_SYMBOL ncclCollNetPlugin_v8
+typedef struct {
+  char* name;                      // Used mostly for logging.
+  char* pciPath;                   // Path to the PCI device in /sys.
+  uint64_t guid;                   // Unique identifier for the NIC chip. Important for
+                                   // cards with multiple PCI functions (Physical or virtual).
+  int ptrSupport;                  // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF]
+  int speed;                       // Port speed in Mbps.
+  int port;                        // Port number.
+  float latency;                   // Network latency
+  int maxComms;                    // Maximum number of comms we can create
+  int maxRecvs;                    // Maximum number of grouped receives.
+  ncclNetDeviceType netDeviceType; // Network offload type
+  int netDeviceVersion;            // Version number for network offload
+} ncclNetProperties_v7_t;
+typedef struct {
+  // Name of the network (mainly for logs)
+  const char* name;
+  // Initialize the network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v7_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create a connection.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Connect to a handle and return a sending comm object for that peer.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with sendComm == NULL with the expectation that
+  // it will be called again until sendComm != NULL.
+  // If *sendDevComm points to a valid object, then NCCL is requesting device offload for this connection
+  ncclResult_t (*connect)(int dev, void* handle, void** sendComm, ncclNetDeviceHandle_v7_t** sendDevComm);
+  // Finalize connection establishment after remote peer has called connect.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with recvComm == NULL with the expectation that
+  // it will be called again until recvComm != NULL.
+  // If *recvDevComm points to a valid object, then NCCL is requesting device offload for this connection
+  ncclResult_t (*accept)(void* listenComm, void** recvComm, ncclNetDeviceHandle_v7_t** recvDevComm);
+  // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
+  // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
+  /* DMA-BUF support */
+  ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
+  ncclResult_t (*deregMr)(void* comm, void* mhandle);
+  // Asynchronous send to a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
+  // Asynchronous recv from a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* sizes);
+  // Close and free send/recv comm objects
+  ncclResult_t (*closeSend)(void* sendComm);
+  ncclResult_t (*closeRecv)(void* recvComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+  // Copy the given mhandle to a dptr in a format usable by this plugin's device code
+  ncclResult_t (*getDeviceMr)(void* comm, void* mhandle, void** dptr_mhandle);
+  // Notify the plugin that a recv has completed by the device
+  ncclResult_t (*irecvConsumed)(void* recvComm, int n, void* request);
+} ncclNet_v7_t;
+typedef struct {
+  // Name of the collective network (mainly for logs)
+  const char* name;
+  // Initialize the collective network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters capable of doing collective operations.
+  // If ndev returns 0, all other functions might be set to NULL.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v7_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create connections.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Create a group for collective operations. handles have been created
+  // using listen() above. rank indicates caller's rank in the collective network.
+  ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
+  // Returns whether a reduction operation on a data type is supported.
+  // 1 for supported, 0 otherwise.
+  ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
+  // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle);
+  /* DMA-BUF support */
+  ncclResult_t (*regMrDmaBuf)(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
+  ncclResult_t (*deregMr)(void* collComm, void* mhandle);
+  // Performs an asynchronous allreduce operation on the collective group.
+  // May return request == NULL if the call cannot be performed (or would block).
+  ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
+      ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* size);
+  // Close and free collective comm objects
+  ncclResult_t (*closeColl)(void* collComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+} ncclCollNet_v7_t;
+#define NCCL_NET_MAX_REQUESTS_V6 8
+// v6 struct for backwards compatibility
+typedef struct {
+  char* name;     // Used mostly for logging.
+  char* pciPath;  // Path to the PCI device in /sys.
+  uint64_t guid;  // Unique identifier for the NIC chip. Important for
+                  // cards with multiple PCI functions (Physical or virtual).
+  int ptrSupport; // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF]
+  int speed;      // Port speed in Mbps.
+  int port;       // Port number.
+  float latency;  // Network latency
+  int maxComms;   // Maximum number of comms we can create
+  int maxRecvs;   // Maximum number of grouped receives.
+} ncclNetProperties_v6_t;
+typedef struct {
+  // Name of the network (mainly for logs)
+  const char* name;
+  // Initialize the network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create a connection.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Connect to a handle and return a sending comm object for that peer.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with sendComm == NULL with the expectation that
+  // it will be called again until sendComm != NULL.
+  ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
+  // Finalize connection establishment after remote peer has called connect.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with recvComm == NULL with the expectation that
+  // it will be called again until recvComm != NULL.
+  ncclResult_t (*accept)(void* listenComm, void** recvComm);
+  // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
+  // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
+  /* DMA-BUF support */
+  ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
+  ncclResult_t (*deregMr)(void* comm, void* mhandle);
+  // Asynchronous send to a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
+  // Asynchronous recv from a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* sizes);
+  // Close and free send/recv comm objects
+  ncclResult_t (*closeSend)(void* sendComm);
+  ncclResult_t (*closeRecv)(void* recvComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+} ncclNet_v6_t;
+typedef struct {
+  // Name of the collective network (mainly for logs)
+  const char* name;
+  // Initialize the collective network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters capable of doing collective operations.
+  // If ndev returns 0, all other functions might be set to NULL.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create connections.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Create a group for collective operations. handles have been created
+  // using listen() above. rank indicates caller's rank in the collective network.
+  ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
+  // Returns whether a reduction operation on a data type is supported.
+  // 1 for supported, 0 otherwise.
+  ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
+  // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle);
+  /* DMA-BUF support */
+  ncclResult_t (*regMrDmaBuf)(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
+  ncclResult_t (*deregMr)(void* collComm, void* mhandle);
+  // Performs an asynchronous allreduce operation on the collective group.
+  // May return request == NULL if the call cannot be performed (or would block).
+  ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
+      ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* size);
+  // Close and free collective comm objects
+  ncclResult_t (*closeColl)(void* collComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+} ncclCollNet_v6_t;
+// v5 struct for backwards compatibility
+typedef struct {
+  // Name of the network (mainly for logs)
+  const char* name;
+  // Initialize the network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create a connection.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Connect to a handle and return a sending comm object for that peer.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with sendComm == NULL with the expectation that
+  // it will be called again until sendComm != NULL.
+  ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
+  // Finalize connection establishment after remote peer has called connect.
+  // This call must not block for the connection to be established, and instead
+  // should return successfully with recvComm == NULL with the expectation that
+  // it will be called again until recvComm != NULL.
+  ncclResult_t (*accept)(void* listenComm, void** recvComm);
+  // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
+  // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
+  ncclResult_t (*deregMr)(void* comm, void* mhandle);
+  // Asynchronous send to a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
+  // Asynchronous recv from a peer.
+  // May return request == NULL if the call cannot be performed (or would block)
+  ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* sizes);
+  // Close and free send/recv comm objects
+  ncclResult_t (*closeSend)(void* sendComm);
+  ncclResult_t (*closeRecv)(void* recvComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+} ncclNet_v5_t;
+// v5 struct for backwards compatibility
+typedef struct {
+  // Name of the collective network (mainly for logs)
+  const char* name;
+  // Initialize the collective network.
+  ncclResult_t (*init)(ncclDebugLogger_t logFunction);
+  // Return the number of adapters capable of doing collective operations.
+  // If ndev returns 0, all other functions might be set to NULL.
+  ncclResult_t (*devices)(int* ndev);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
+  // Create a receiving object and provide a handle to connect to it. The
+  // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
+  // between ranks to create connections.
+  ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
+  // Create a group for collective operations. handles have been created
+  // using listen() above. rank indicates caller's rank in the collective network.
+  ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
+  // Returns whether a reduction operation on a data type is supported.
+  // 1 for supported, 0 otherwise.
+  ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
+  // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle);
+  ncclResult_t (*deregMr)(void* collComm, void* mhandle);
+  // Performs an asynchronous allreduce operation on the collective group.
+  // May return request == NULL if the call cannot be performed (or would block).
+  ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
+      ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
+  // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
+  // visible to the GPU
+  ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
+  // Test whether a request is complete. If size is not NULL, it returns the
+  // number of bytes sent/received.
+  ncclResult_t (*test)(void* request, int* done, int* size);
+  // Close and free collective comm objects
+  ncclResult_t (*closeColl)(void* collComm);
+  ncclResult_t (*closeListen)(void* listenComm);
+} ncclCollNet_v5_t;
+#endif // end include guard

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtCudaRt.h ADDED Viewed

	@@ -0,0 +1,146 @@

+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#include "nvToolsExt.h"
+#include "cuda.h"
+#include "driver_types.h"
+#ifndef NVTOOLSEXT_CUDART_V3
+#define NVTOOLSEXT_CUDART_V3
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/* ========================================================================= */
+/** \name Functions for CUDA Resource Naming
+*/
+/** \addtogroup RESOURCE_NAMING
+ * \section RESOURCE_NAMING_CUDART CUDA Runtime Resource Naming
+ *
+ * This section covers the API functions that allow to annotate CUDA resources
+ * with user-provided names.
+ *
+ * @{
+ */
+/*  ------------------------------------------------------------------------- */
+/* \cond SHOW_HIDDEN
+* \brief Used to build a non-colliding value for resource types separated class
+* \version \NVTX_VERSION_2
+*/
+#define NVTX_RESOURCE_CLASS_CUDART 5
+/** \endcond */
+/*  ------------------------------------------------------------------------- */
+/** \brief Resource types for CUDART
+*/
+typedef enum nvtxResourceCUDARTType_t
+{
+    NVTX_RESOURCE_TYPE_CUDART_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDART, 0), /* int device */
+    NVTX_RESOURCE_TYPE_CUDART_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDART, 1), /* cudaStream_t */
+    NVTX_RESOURCE_TYPE_CUDART_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDART, 2), /* cudaEvent_t */
+} nvtxResourceCUDARTType_t;
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA device.
+ *
+ * Allows the user to associate a CUDA device with a user-provided name.
+ *
+ * \param device - The id of the CUDA device to name.
+ * \param name   - The name of the CUDA device.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA stream.
+ *
+ * Allows the user to associate a CUDA stream with a user-provided name.
+ *
+ * \param stream - The handle of the CUDA stream to name.
+ * \param name   - The name of the CUDA stream.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name);
+/** @} */
+/* ------------------------------------------------------------------------- */
+/** \brief Annotates a CUDA event.
+ *
+ * Allows the user to associate a CUDA event with a user-provided name.
+ *
+ * \param event - The handle of the CUDA event to name.
+ * \param name  - The name of the CUDA event.
+ *
+ * \version \NVTX_VERSION_1
+ * @{ */
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name);
+/** @} */
+/** @} */ /* END RESOURCE_NAMING */
+/* ========================================================================= */
+#ifdef UNICODE
+  #define nvtxNameCudaDevice nvtxNameCudaDeviceW
+  #define nvtxNameCudaStream nvtxNameCudaStreamW
+  #define nvtxNameCudaEvent  nvtxNameCudaEventW
+#else
+  #define nvtxNameCudaDevice nvtxNameCudaDeviceA
+  #define nvtxNameCudaStream nvtxNameCudaStreamA
+  #define nvtxNameCudaEvent  nvtxNameCudaEventA
+#endif
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#ifndef NVTX_NO_IMPL
+#define NVTX_IMPL_GUARD_CUDART /* Ensure other headers cannot included directly */
+#include "nvtxDetail/nvtxImplCudaRt_v3.h"
+#undef NVTX_IMPL_GUARD_CUDART
+#endif /*NVTX_NO_IMPL*/
+#endif /* NVTOOLSEXT_CUDART_V3 */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCore.h ADDED Viewed

	@@ -0,0 +1,299 @@

+NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib)
+{
+#ifndef NVTX_DISABLE
+    nvtxMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr;
+    if(local!=0)
+        (*local)(eventAttrib);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxMarkA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr;
+    if(local!=0)
+        (*local)(message);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxMarkW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr;
+    if(local!=0)
+        (*local)(message);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr;
+    if(local!=0)
+        return (*local)(eventAttrib);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxRangeId_t)0;
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangeStartA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr;
+    if(local!=0)
+        return (*local)(message);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxRangeId_t)0;
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangeStartW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr;
+    if(local!=0)
+        return (*local)(message);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxRangeId_t)0;
+}
+NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr;
+    if(local!=0)
+        (*local)(id);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr;
+    if(local!=0)
+        return (*local)(eventAttrib);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (int)NVTX_NO_PUSH_POP_TRACKING;
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangePushA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr;
+    if(local!=0)
+        return (*local)(message);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (int)NVTX_NO_PUSH_POP_TRACKING;
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangePushW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr;
+    if(local!=0)
+        return (*local)(message);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (int)NVTX_NO_PUSH_POP_TRACKING;
+}
+NVTX_DECLSPEC int NVTX_API nvtxRangePop(void)
+{
+#ifndef NVTX_DISABLE
+    nvtxRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr;
+    if(local!=0)
+        return (*local)();
+    else
+#endif  /*NVTX_DISABLE*/
+        return (int)NVTX_NO_PUSH_POP_TRACKING;
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr;
+    if(local!=0)
+        (*local)(category, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr;
+    if(local!=0)
+        (*local)(category, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameOsThreadA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr;
+    if(local!=0)
+        (*local)(threadId, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameOsThreadW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr;
+    if(local!=0)
+        (*local)(threadId, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr;
+    if(local!=0)
+        (*local)(domain, eventAttrib);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr;
+    if(local!=0)
+        return (*local)(domain, eventAttrib);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxRangeId_t)0;
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr;
+    if(local!=0)
+        (*local)(domain, id);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr;
+    if(local!=0)
+        return (*local)(domain, eventAttrib);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (int)NVTX_NO_PUSH_POP_TRACKING;
+}
+NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr;
+    if(local!=0)
+        return (*local)(domain);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (int)NVTX_NO_PUSH_POP_TRACKING;
+}
+NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainResourceCreate_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr;
+    if(local!=0)
+        return (*local)(domain, attribs);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxResourceHandle_t)0;
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainResourceDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr;
+    if(local!=0)
+        (*local)(resource);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr;
+    if(local!=0)
+        (*local)(domain, category, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr;
+    if(local!=0)
+        (*local)(domain, category, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainRegisterStringA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr;
+    if(local!=0)
+        return (*local)(domain, string);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxStringHandle_t)0;
+}
+NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainRegisterStringW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr;
+    if(local!=0)
+        return (*local)(domain, string);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxStringHandle_t)0;
+}
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainCreateA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr;
+    if(local!=0)
+        return (*local)(message);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxDomainHandle_t)0;
+}
+NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* message)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainCreateW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr;
+    if(local!=0)
+        return (*local)(message);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxDomainHandle_t)0;
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr;
+    if(local!=0)
+        (*local)(domain);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved)
+{
+#ifndef NVTX_DISABLE
+    nvtxInitialize_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr;
+    if(local!=0)
+        (*local)(reserved);
+#endif /*NVTX_DISABLE*/
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h ADDED Viewed

	@@ -0,0 +1,112 @@

+/* This file was procedurally generated!  Do not modify this file by hand.  */
+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#ifndef NVTX_IMPL_GUARD_CUDART
+#error Never include this file directly -- it is automatically included by nvToolsExtCudaRt.h (except when NVTX_NO_IMPL is defined).
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+//typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
+//typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaStreamA_impl_fntype)(cudaStream_t stream, const char* name);
+typedef void (NVTX_API * nvtxNameCudaStreamW_impl_fntype)(cudaStream_t stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaEventA_impl_fntype)(cudaEvent_t event, const char* name);
+typedef void (NVTX_API * nvtxNameCudaEventW_impl_fntype)(cudaEvent_t event, const wchar_t* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCudaDeviceA_impl_fntype local = (nvtxNameCudaDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
+    if(local!=0)
+        (*local)(device, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCudaDeviceW_impl_fntype local = (nvtxNameCudaDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
+    if(local!=0)
+        (*local)(device, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCudaStreamA_impl_fntype local = (nvtxNameCudaStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
+    if(local!=0)
+        (*local)(stream, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCudaStreamW_impl_fntype local = (nvtxNameCudaStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
+    if(local!=0)
+        (*local)(stream, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCudaEventA_impl_fntype local = (nvtxNameCudaEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
+    if(local!=0)
+        (*local)(event, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCudaEventW_impl_fntype local = (nvtxNameCudaEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
+    if(local!=0)
+        (*local)(event, name);
+#endif /*NVTX_DISABLE*/
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h ADDED Viewed

	@@ -0,0 +1,133 @@

+/* This file was procedurally generated!  Do not modify this file by hand.  */
+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#ifndef NVTX_IMPL_GUARD_CUDA
+#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+typedef void (NVTX_API * nvtxNameCuDeviceA_impl_fntype)(CUdevice device, const char* name);
+typedef void (NVTX_API * nvtxNameCuDeviceW_impl_fntype)(CUdevice device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuContextA_impl_fntype)(CUcontext context, const char* name);
+typedef void (NVTX_API * nvtxNameCuContextW_impl_fntype)(CUcontext context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuStreamA_impl_fntype)(CUstream stream, const char* name);
+typedef void (NVTX_API * nvtxNameCuStreamW_impl_fntype)(CUstream stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuEventA_impl_fntype)(CUevent event, const char* name);
+typedef void (NVTX_API * nvtxNameCuEventW_impl_fntype)(CUevent event, const wchar_t* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuDeviceA_impl_fntype local = (nvtxNameCuDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
+    if(local!=0)
+        (*local)(device, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuDeviceW_impl_fntype local = (nvtxNameCuDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
+    if(local!=0)
+        (*local)(device, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuContextA_impl_fntype local = (nvtxNameCuContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
+    if(local!=0)
+        (*local)(context, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuContextW_impl_fntype local = (nvtxNameCuContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
+    if(local!=0)
+        (*local)(context, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuStreamA_impl_fntype local = (nvtxNameCuStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
+    if(local!=0)
+        (*local)(stream, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuStreamW_impl_fntype local = (nvtxNameCuStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
+    if(local!=0)
+        (*local)(stream, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuEventA_impl_fntype local = (nvtxNameCuEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
+    if(local!=0)
+        (*local)(event, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameCuEventW_impl_fntype local = (nvtxNameCuEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
+    if(local!=0)
+        (*local)(event, name);
+#endif /*NVTX_DISABLE*/
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h ADDED Viewed

	@@ -0,0 +1,192 @@

+/* This file was procedurally generated!  Do not modify this file by hand.  */
+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#ifndef NVTX_IMPL_GUARD_OPENCL
+#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+typedef void (NVTX_API * nvtxNameClDeviceA_impl_fntype)(cl_device_id device, const char* name);
+typedef void (NVTX_API * nvtxNameClDeviceW_impl_fntype)(cl_device_id device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClContextA_impl_fntype)(cl_context context, const char* name);
+typedef void (NVTX_API * nvtxNameClContextW_impl_fntype)(cl_context context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueA_impl_fntype)(cl_command_queue command_queue, const char* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueW_impl_fntype)(cl_command_queue command_queue, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClMemObjectA_impl_fntype)(cl_mem memobj, const char* name);
+typedef void (NVTX_API * nvtxNameClMemObjectW_impl_fntype)(cl_mem memobj, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClSamplerA_impl_fntype)(cl_sampler sampler, const char* name);
+typedef void (NVTX_API * nvtxNameClSamplerW_impl_fntype)(cl_sampler sampler, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClProgramA_impl_fntype)(cl_program program, const char* name);
+typedef void (NVTX_API * nvtxNameClProgramW_impl_fntype)(cl_program program, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClEventA_impl_fntype)(cl_event evnt, const char* name);
+typedef void (NVTX_API * nvtxNameClEventW_impl_fntype)(cl_event evnt, const wchar_t* name);
+NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClDeviceA_impl_fntype local = (nvtxNameClDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
+    if(local!=0)
+        (*local)(device, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClDeviceW_impl_fntype local = (nvtxNameClDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
+    if(local!=0)
+        (*local)(device, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClContextA_impl_fntype local = (nvtxNameClContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
+    if(local!=0)
+        (*local)(context, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClContextW_impl_fntype local = (nvtxNameClContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
+    if(local!=0)
+        (*local)(context, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClCommandQueueA_impl_fntype local = (nvtxNameClCommandQueueA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
+    if(local!=0)
+        (*local)(command_queue, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClCommandQueueW_impl_fntype local = (nvtxNameClCommandQueueW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
+    if(local!=0)
+        (*local)(command_queue, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClMemObjectA_impl_fntype local = (nvtxNameClMemObjectA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
+    if(local!=0)
+        (*local)(memobj, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClMemObjectW_impl_fntype local = (nvtxNameClMemObjectW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
+    if(local!=0)
+        (*local)(memobj, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClSamplerA_impl_fntype local = (nvtxNameClSamplerA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
+    if(local!=0)
+        (*local)(sampler, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClSamplerW_impl_fntype local = (nvtxNameClSamplerW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
+    if(local!=0)
+        (*local)(sampler, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClProgramA_impl_fntype local = (nvtxNameClProgramA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
+    if(local!=0)
+        (*local)(program, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClProgramW_impl_fntype local = (nvtxNameClProgramW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
+    if(local!=0)
+        (*local)(program, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClEventA_impl_fntype local = (nvtxNameClEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
+    if(local!=0)
+        (*local)(evnt, name);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name)
+{
+#ifndef NVTX_DISABLE
+    nvtxNameClEventW_impl_fntype local = (nvtxNameClEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
+    if(local!=0)
+        (*local)(evnt, name);
+#endif /*NVTX_DISABLE*/
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h ADDED Viewed

	@@ -0,0 +1,114 @@

+/* This file was procedurally generated!  Do not modify this file by hand.  */
+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+#ifndef NVTX_IMPL_GUARD_SYNC
+#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
+typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
+NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainSyncUserCreate_impl_fntype local = (nvtxDomainSyncUserCreate_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
+    if(local!=0)
+        return (*local)(domain, attribs);
+    else
+#endif  /*NVTX_DISABLE*/
+        return (nvtxSyncUser_t)0;
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainSyncUserDestroy_impl_fntype local = (nvtxDomainSyncUserDestroy_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
+    if(local!=0)
+        (*local)(handle);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainSyncUserAcquireStart_impl_fntype local = (nvtxDomainSyncUserAcquireStart_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
+    if(local!=0)
+        (*local)(handle);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainSyncUserAcquireFailed_impl_fntype local = (nvtxDomainSyncUserAcquireFailed_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
+    if(local!=0)
+        (*local)(handle);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainSyncUserAcquireSuccess_impl_fntype local = (nvtxDomainSyncUserAcquireSuccess_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
+    if(local!=0)
+        (*local)(handle);
+#endif /*NVTX_DISABLE*/
+}
+NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle)
+{
+#ifndef NVTX_DISABLE
+    nvtxDomainSyncUserReleasing_impl_fntype local = (nvtxDomainSyncUserReleasing_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
+    if(local!=0)
+        (*local)(handle);
+#endif /*NVTX_DISABLE*/
+}
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInitDefs.h ADDED Viewed

	@@ -0,0 +1,565 @@

+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
+#endif
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxMarkEx(eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxMarkA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxMarkW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangeStartEx(eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangeStartA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangeStartW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxRangeEnd(id);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePushEx(eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePushA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePushW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxRangePop();
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameCategoryA(category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameCategoryW(category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameOsThreadA(threadId, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxNameOsThreadW(threadId, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainMarkEx(domain, eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRangeStartEx(domain, eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainRangeEnd(domain, id);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRangePushEx(domain, eventAttrib);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRangePop(domain);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainResourceCreate(domain, attribs);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainResourceDestroy(resource);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainNameCategoryA(domain, category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainNameCategoryW(domain, category, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRegisterStringA(domain, string);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainRegisterStringW(domain, string);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainCreateA(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    return nvtxDomainCreateW(message);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxDomainDestroy(domain);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved){
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    nvtxInitialize(reserved);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name){
+    nvtxNameCuDeviceA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name){
+    nvtxNameCuDeviceW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name){
+    nvtxNameCuContextA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name){
+    nvtxNameCuContextW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name){
+    nvtxNameCuStreamA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name){
+    nvtxNameCuStreamW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name){
+    nvtxNameCuEventA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name){
+    nvtxNameCuEventW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){
+    nvtxNameCudaDeviceA_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){
+    nvtxNameCudaDeviceW_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name){
+    nvtxNameCudaStreamA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name){
+    nvtxNameCudaStreamW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
+    if (local)
+        local(stream, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name){
+    nvtxNameCudaEventA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name){
+    nvtxNameCudaEventW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
+    if (local)
+        local(event, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name){
+    nvtxNameClDeviceA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name){
+    nvtxNameClDeviceW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
+    if (local)
+        local(device, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name){
+    nvtxNameClContextA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name){
+    nvtxNameClContextW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
+    if (local)
+        local(context, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name){
+    nvtxNameClCommandQueueA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
+    if (local)
+        local(command_queue, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name){
+    nvtxNameClCommandQueueW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
+    if (local)
+        local(command_queue, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name){
+    nvtxNameClMemObjectA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
+    if (local)
+        local(memobj, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name){
+    nvtxNameClMemObjectW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
+    if (local)
+        local(memobj, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name){
+    nvtxNameClSamplerA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
+    if (local)
+        local(sampler, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name){
+    nvtxNameClSamplerW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
+    if (local)
+        local(sampler, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name){
+    nvtxNameClProgramA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
+    if (local)
+        local(program, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name){
+    nvtxNameClProgramW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
+    if (local)
+        local(program, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name){
+    nvtxNameClEventA_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
+    if (local)
+        local(evnt, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name){
+    nvtxNameClEventW_fakeimpl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
+    if (local)
+        local(evnt, name);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs){
+    nvtxDomainSyncUserCreate_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
+    if (local) {
+        return local(domain, attribs);
+    }
+    return (nvtxSyncUser_t)0;
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle){
+    nvtxDomainSyncUserDestroy_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle){
+    nvtxDomainSyncUserAcquireStart_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle){
+    nvtxDomainSyncUserAcquireFailed_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle){
+    nvtxDomainSyncUserAcquireSuccess_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle){
+    nvtxDomainSyncUserReleasing_impl_fntype local;
+    NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
+    local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
+    if (local)
+        local(handle);
+}
+NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops);
+NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops)
+{
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr = NULL;
+    if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init) || forceAllToNoops)
+        NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr = NULL;
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxTypes.h ADDED Viewed

	@@ -0,0 +1,333 @@

+/*
+* Copyright 2009-2016  NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* This software and the information contained herein is PROPRIETARY and
+* CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+* of a form of NVIDIA software license agreement.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.   This source code is a "commercial item" as
+* that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer  software"  and "commercial computer software
+* documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*
+* Any use of this source code in individual and commercial software must
+* include, in the user documentation and internal comments to the code,
+* the above Disclaimer and U.S. Government End Users Notice.
+*/
+/* This header defines types which are used by the internal implementation
+*  of NVTX and callback subscribers.  API clients do not use these types,
+*  so they are defined here instead of in nvToolsExt.h to clarify they are
+*  not part of the NVTX client API. */
+#ifndef NVTX_IMPL_GUARD
+#error Never include this file directly -- it is automatically included by nvToolsExt.h.
+#endif
+/* ------ Dependency-free types binary-compatible with real types ------- */
+/* In order to avoid having the NVTX core API headers depend on non-NVTX
+*  headers like cuda.h, NVTX defines binary-compatible types to use for
+*  safely making the initialization versions of all NVTX functions without
+*  needing to have definitions for the real types. */
+typedef int   nvtx_CUdevice;
+typedef void* nvtx_CUcontext;
+typedef void* nvtx_CUstream;
+typedef void* nvtx_CUevent;
+typedef void* nvtx_cudaStream_t;
+typedef void* nvtx_cudaEvent_t;
+typedef void* nvtx_cl_platform_id;
+typedef void* nvtx_cl_device_id;
+typedef void* nvtx_cl_context;
+typedef void* nvtx_cl_command_queue;
+typedef void* nvtx_cl_mem;
+typedef void* nvtx_cl_program;
+typedef void* nvtx_cl_kernel;
+typedef void* nvtx_cl_event;
+typedef void* nvtx_cl_sampler;
+typedef struct nvtxSyncUser* nvtxSyncUser_t;
+struct nvtxSyncUserAttributes_v0;
+typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
+/* --------- Types for function pointers (with fake API types) ---------- */
+typedef void (NVTX_API * nvtxMarkEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
+typedef void (NVTX_API * nvtxMarkA_impl_fntype)(const char* message);
+typedef void (NVTX_API * nvtxMarkW_impl_fntype)(const wchar_t* message);
+typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
+typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartA_impl_fntype)(const char* message);
+typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartW_impl_fntype)(const wchar_t* message);
+typedef void (NVTX_API * nvtxRangeEnd_impl_fntype)(nvtxRangeId_t id);
+typedef int (NVTX_API * nvtxRangePushEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
+typedef int (NVTX_API * nvtxRangePushA_impl_fntype)(const char* message);
+typedef int (NVTX_API * nvtxRangePushW_impl_fntype)(const wchar_t* message);
+typedef int (NVTX_API * nvtxRangePop_impl_fntype)(void);
+typedef void (NVTX_API * nvtxNameCategoryA_impl_fntype)(uint32_t category, const char* name);
+typedef void (NVTX_API * nvtxNameCategoryW_impl_fntype)(uint32_t category, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameOsThreadA_impl_fntype)(uint32_t threadId, const char* name);
+typedef void (NVTX_API * nvtxNameOsThreadW_impl_fntype)(uint32_t threadId, const wchar_t* name);
+/* Real impl types are defined in nvtxImplCuda_v3.h, where CUDA headers are included */
+typedef void (NVTX_API * nvtxNameCuDeviceA_fakeimpl_fntype)(nvtx_CUdevice device, const char* name);
+typedef void (NVTX_API * nvtxNameCuDeviceW_fakeimpl_fntype)(nvtx_CUdevice device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuContextA_fakeimpl_fntype)(nvtx_CUcontext context, const char* name);
+typedef void (NVTX_API * nvtxNameCuContextW_fakeimpl_fntype)(nvtx_CUcontext context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuStreamA_fakeimpl_fntype)(nvtx_CUstream stream, const char* name);
+typedef void (NVTX_API * nvtxNameCuStreamW_fakeimpl_fntype)(nvtx_CUstream stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCuEventA_fakeimpl_fntype)(nvtx_CUevent event, const char* name);
+typedef void (NVTX_API * nvtxNameCuEventW_fakeimpl_fntype)(nvtx_CUevent event, const wchar_t* name);
+/* Real impl types are defined in nvtxImplOpenCL_v3.h, where OPENCL headers are included */
+typedef void (NVTX_API * nvtxNameClDeviceA_fakeimpl_fntype)(nvtx_cl_device_id device, const char* name);
+typedef void (NVTX_API * nvtxNameClDeviceW_fakeimpl_fntype)(nvtx_cl_device_id device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClContextA_fakeimpl_fntype)(nvtx_cl_context context, const char* name);
+typedef void (NVTX_API * nvtxNameClContextW_fakeimpl_fntype)(nvtx_cl_context context, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueA_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const char* name);
+typedef void (NVTX_API * nvtxNameClCommandQueueW_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClMemObjectA_fakeimpl_fntype)(nvtx_cl_mem memobj, const char* name);
+typedef void (NVTX_API * nvtxNameClMemObjectW_fakeimpl_fntype)(nvtx_cl_mem memobj, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClSamplerA_fakeimpl_fntype)(nvtx_cl_sampler sampler, const char* name);
+typedef void (NVTX_API * nvtxNameClSamplerW_fakeimpl_fntype)(nvtx_cl_sampler sampler, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClProgramA_fakeimpl_fntype)(nvtx_cl_program program, const char* name);
+typedef void (NVTX_API * nvtxNameClProgramW_fakeimpl_fntype)(nvtx_cl_program program, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameClEventA_fakeimpl_fntype)(nvtx_cl_event evnt, const char* name);
+typedef void (NVTX_API * nvtxNameClEventW_fakeimpl_fntype)(nvtx_cl_event evnt, const wchar_t* name);
+/* Real impl types are defined in nvtxImplCudaRt_v3.h, where CUDART headers are included */
+typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
+typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaStreamA_fakeimpl_fntype)(nvtx_cudaStream_t stream, const char* name);
+typedef void (NVTX_API * nvtxNameCudaStreamW_fakeimpl_fntype)(nvtx_cudaStream_t stream, const wchar_t* name);
+typedef void (NVTX_API * nvtxNameCudaEventA_fakeimpl_fntype)(nvtx_cudaEvent_t event, const char* name);
+typedef void (NVTX_API * nvtxNameCudaEventW_fakeimpl_fntype)(nvtx_cudaEvent_t event, const wchar_t* name);
+typedef void (NVTX_API * nvtxDomainMarkEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+typedef nvtxRangeId_t (NVTX_API * nvtxDomainRangeStartEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+typedef void (NVTX_API * nvtxDomainRangeEnd_impl_fntype)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
+typedef int (NVTX_API * nvtxDomainRangePushEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
+typedef int (NVTX_API * nvtxDomainRangePop_impl_fntype)(nvtxDomainHandle_t domain);
+typedef nvtxResourceHandle_t (NVTX_API * nvtxDomainResourceCreate_impl_fntype)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
+typedef void (NVTX_API * nvtxDomainResourceDestroy_impl_fntype)(nvtxResourceHandle_t resource);
+typedef void (NVTX_API * nvtxDomainNameCategoryA_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
+typedef void (NVTX_API * nvtxDomainNameCategoryW_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
+typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringA_impl_fntype)(nvtxDomainHandle_t domain, const char* string);
+typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringW_impl_fntype)(nvtxDomainHandle_t domain, const wchar_t* string);
+typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateA_impl_fntype)(const char* message);
+typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateW_impl_fntype)(const wchar_t* message);
+typedef void (NVTX_API * nvtxDomainDestroy_impl_fntype)(nvtxDomainHandle_t domain);
+typedef void (NVTX_API * nvtxInitialize_impl_fntype)(const void* reserved);
+typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
+typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
+typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
+/* ---------------- Types for callback subscription --------------------- */
+typedef const void *(NVTX_API * NvtxGetExportTableFunc_t)(uint32_t exportTableId);
+typedef int (NVTX_API * NvtxInitializeInjectionNvtxFunc_t)(NvtxGetExportTableFunc_t exportTable);
+typedef enum NvtxCallbackModule
+{
+    NVTX_CB_MODULE_INVALID                 = 0,
+    NVTX_CB_MODULE_CORE                    = 1,
+    NVTX_CB_MODULE_CUDA                    = 2,
+    NVTX_CB_MODULE_OPENCL                  = 3,
+    NVTX_CB_MODULE_CUDART                  = 4,
+    NVTX_CB_MODULE_CORE2                   = 5,
+    NVTX_CB_MODULE_SYNC                    = 6,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CB_MODULE_SIZE,
+    NVTX_CB_MODULE_FORCE_INT               = 0x7fffffff
+} NvtxCallbackModule;
+typedef enum NvtxCallbackIdCore
+{
+    NVTX_CBID_CORE_INVALID                 =  0,
+    NVTX_CBID_CORE_MarkEx                  =  1,
+    NVTX_CBID_CORE_MarkA                   =  2,
+    NVTX_CBID_CORE_MarkW                   =  3,
+    NVTX_CBID_CORE_RangeStartEx            =  4,
+    NVTX_CBID_CORE_RangeStartA             =  5,
+    NVTX_CBID_CORE_RangeStartW             =  6,
+    NVTX_CBID_CORE_RangeEnd                =  7,
+    NVTX_CBID_CORE_RangePushEx             =  8,
+    NVTX_CBID_CORE_RangePushA              =  9,
+    NVTX_CBID_CORE_RangePushW              = 10,
+    NVTX_CBID_CORE_RangePop                = 11,
+    NVTX_CBID_CORE_NameCategoryA           = 12,
+    NVTX_CBID_CORE_NameCategoryW           = 13,
+    NVTX_CBID_CORE_NameOsThreadA           = 14,
+    NVTX_CBID_CORE_NameOsThreadW           = 15,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CORE_SIZE,
+    NVTX_CBID_CORE_FORCE_INT = 0x7fffffff
+} NvtxCallbackIdCore;
+typedef enum NvtxCallbackIdCore2
+{
+    NVTX_CBID_CORE2_INVALID                 = 0,
+    NVTX_CBID_CORE2_DomainMarkEx            = 1,
+    NVTX_CBID_CORE2_DomainRangeStartEx      = 2,
+    NVTX_CBID_CORE2_DomainRangeEnd          = 3,
+    NVTX_CBID_CORE2_DomainRangePushEx       = 4,
+    NVTX_CBID_CORE2_DomainRangePop          = 5,
+    NVTX_CBID_CORE2_DomainResourceCreate    = 6,
+    NVTX_CBID_CORE2_DomainResourceDestroy   = 7,
+    NVTX_CBID_CORE2_DomainNameCategoryA     = 8,
+    NVTX_CBID_CORE2_DomainNameCategoryW     = 9,
+    NVTX_CBID_CORE2_DomainRegisterStringA   = 10,
+    NVTX_CBID_CORE2_DomainRegisterStringW   = 11,
+    NVTX_CBID_CORE2_DomainCreateA           = 12,
+    NVTX_CBID_CORE2_DomainCreateW           = 13,
+    NVTX_CBID_CORE2_DomainDestroy           = 14,
+    NVTX_CBID_CORE2_Initialize              = 15,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CORE2_SIZE,
+    NVTX_CBID_CORE2_FORCE_INT               = 0x7fffffff
+} NvtxCallbackIdCore2;
+typedef enum NvtxCallbackIdCuda
+{
+    NVTX_CBID_CUDA_INVALID                 =  0,
+    NVTX_CBID_CUDA_NameCuDeviceA           =  1,
+    NVTX_CBID_CUDA_NameCuDeviceW           =  2,
+    NVTX_CBID_CUDA_NameCuContextA          =  3,
+    NVTX_CBID_CUDA_NameCuContextW          =  4,
+    NVTX_CBID_CUDA_NameCuStreamA           =  5,
+    NVTX_CBID_CUDA_NameCuStreamW           =  6,
+    NVTX_CBID_CUDA_NameCuEventA            =  7,
+    NVTX_CBID_CUDA_NameCuEventW            =  8,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CUDA_SIZE,
+    NVTX_CBID_CUDA_FORCE_INT               = 0x7fffffff
+} NvtxCallbackIdCuda;
+typedef enum NvtxCallbackIdCudaRt
+{
+    NVTX_CBID_CUDART_INVALID               =  0,
+    NVTX_CBID_CUDART_NameCudaDeviceA       =  1,
+    NVTX_CBID_CUDART_NameCudaDeviceW       =  2,
+    NVTX_CBID_CUDART_NameCudaStreamA       =  3,
+    NVTX_CBID_CUDART_NameCudaStreamW       =  4,
+    NVTX_CBID_CUDART_NameCudaEventA        =  5,
+    NVTX_CBID_CUDART_NameCudaEventW        =  6,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_CUDART_SIZE,
+    NVTX_CBID_CUDART_FORCE_INT             = 0x7fffffff
+} NvtxCallbackIdCudaRt;
+typedef enum NvtxCallbackIdOpenCL
+{
+    NVTX_CBID_OPENCL_INVALID               =  0,
+    NVTX_CBID_OPENCL_NameClDeviceA         =  1,
+    NVTX_CBID_OPENCL_NameClDeviceW         =  2,
+    NVTX_CBID_OPENCL_NameClContextA        =  3,
+    NVTX_CBID_OPENCL_NameClContextW        =  4,
+    NVTX_CBID_OPENCL_NameClCommandQueueA   =  5,
+    NVTX_CBID_OPENCL_NameClCommandQueueW   =  6,
+    NVTX_CBID_OPENCL_NameClMemObjectA      =  7,
+    NVTX_CBID_OPENCL_NameClMemObjectW      =  8,
+    NVTX_CBID_OPENCL_NameClSamplerA        =  9,
+    NVTX_CBID_OPENCL_NameClSamplerW        = 10,
+    NVTX_CBID_OPENCL_NameClProgramA        = 11,
+    NVTX_CBID_OPENCL_NameClProgramW        = 12,
+    NVTX_CBID_OPENCL_NameClEventA          = 13,
+    NVTX_CBID_OPENCL_NameClEventW          = 14,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_OPENCL_SIZE,
+    NVTX_CBID_OPENCL_FORCE_INT             = 0x7fffffff
+} NvtxCallbackIdOpenCL;
+typedef enum NvtxCallbackIdSync
+{
+    NVTX_CBID_SYNC_INVALID                      = 0,
+    NVTX_CBID_SYNC_DomainSyncUserCreate         = 1,
+    NVTX_CBID_SYNC_DomainSyncUserDestroy        = 2,
+    NVTX_CBID_SYNC_DomainSyncUserAcquireStart   = 3,
+    NVTX_CBID_SYNC_DomainSyncUserAcquireFailed  = 4,
+    NVTX_CBID_SYNC_DomainSyncUserAcquireSuccess = 5,
+    NVTX_CBID_SYNC_DomainSyncUserReleasing      = 6,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_CBID_SYNC_SIZE,
+    NVTX_CBID_SYNC_FORCE_INT                    = 0x7fffffff
+} NvtxCallbackIdSync;
+/* IDs for NVTX Export Tables */
+typedef enum NvtxExportTableID
+{
+    NVTX_ETID_INVALID                      = 0,
+    NVTX_ETID_CALLBACKS                    = 1,
+    NVTX_ETID_RESERVED0                    = 2,
+    NVTX_ETID_VERSIONINFO                  = 3,
+    /* --- New constants must only be added directly above this line --- */
+    NVTX_ETID_SIZE,
+    NVTX_ETID_FORCE_INT                    = 0x7fffffff
+} NvtxExportTableID;
+typedef void (* NvtxFunctionPointer)(void); /* generic uncallable function pointer, must be casted to appropriate function type */
+typedef NvtxFunctionPointer** NvtxFunctionTable; /* double pointer because array(1) of pointers(2) to function pointers */
+typedef struct NvtxExportTableCallbacks
+{
+    size_t struct_size;
+    /* returns an array of pointer to function pointers*/
+    int (NVTX_API *GetModuleFunctionTable)(
+        NvtxCallbackModule module,
+        NvtxFunctionTable* out_table,
+        unsigned int* out_size);
+} NvtxExportTableCallbacks;
+typedef struct NvtxExportTableVersionInfo
+{
+    /* sizeof(NvtxExportTableVersionInfo) */
+    size_t struct_size;
+    /* The API version comes from the NVTX library linked to the app.  The
+    * injection library is can use this info to make some assumptions */
+    uint32_t version;
+    /* Reserved for alignment, do not use */
+    uint32_t reserved0;
+    /* This must be set by tools when attaching to provide applications
+    *  the ability to, in emergency situations, detect problematic tools
+    *  versions and modify the NVTX source to prevent attaching anything
+    *  that causes trouble in the app.  Currently, this value is ignored. */
+    void (NVTX_API *SetInjectionNvtxVersion)(
+        uint32_t version);
+} NvtxExportTableVersionInfo;

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/lib/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,3 @@

+This software is made available under the terms of *either* of the licenses
+found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
+under the terms of *both* these licenses.

tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,41 @@

+packaging-24.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+packaging-24.2.dist-info/LICENSE,sha256=ytHvW9NA1z4HS6YU0m996spceUDD2MNIUuZcSQlobEg,197
+packaging-24.2.dist-info/LICENSE.APACHE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
+packaging-24.2.dist-info/LICENSE.BSD,sha256=tw5-m3QvHMb5SLNMFqo5_-zpQZY2S8iP8NIYDwAo-sU,1344
+packaging-24.2.dist-info/METADATA,sha256=ohH86s6k5mIfQxY2TS0LcSfADeOFa4BiCC-bxZV-pNs,3204
+packaging-24.2.dist-info/RECORD,,
+packaging-24.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+packaging-24.2.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
+packaging/__init__.py,sha256=dk4Ta_vmdVJxYHDcfyhvQNw8V3PgSBomKNXqg-D2JDY,494
+packaging/__pycache__/__init__.cpython-311.pyc,,
+packaging/__pycache__/_elffile.cpython-311.pyc,,
+packaging/__pycache__/_manylinux.cpython-311.pyc,,
+packaging/__pycache__/_musllinux.cpython-311.pyc,,
+packaging/__pycache__/_parser.cpython-311.pyc,,
+packaging/__pycache__/_structures.cpython-311.pyc,,
+packaging/__pycache__/_tokenizer.cpython-311.pyc,,
+packaging/__pycache__/markers.cpython-311.pyc,,
+packaging/__pycache__/metadata.cpython-311.pyc,,
+packaging/__pycache__/requirements.cpython-311.pyc,,
+packaging/__pycache__/specifiers.cpython-311.pyc,,
+packaging/__pycache__/tags.cpython-311.pyc,,
+packaging/__pycache__/utils.cpython-311.pyc,,
+packaging/__pycache__/version.cpython-311.pyc,,
+packaging/_elffile.py,sha256=cflAQAkE25tzhYmq_aCi72QfbT_tn891tPzfpbeHOwE,3306
+packaging/_manylinux.py,sha256=vl5OCoz4kx80H5rwXKeXWjl9WNISGmr4ZgTpTP9lU9c,9612
+packaging/_musllinux.py,sha256=p9ZqNYiOItGee8KcZFeHF_YcdhVwGHdK6r-8lgixvGQ,2694
+packaging/_parser.py,sha256=s_TvTvDNK0NrM2QB3VKThdWFM4Nc0P6JnkObkl3MjpM,10236
+packaging/_structures.py,sha256=q3eVNmbWJGG_S0Dit_S3Ao8qQqz_5PYTXFAKBZe5yr4,1431
+packaging/_tokenizer.py,sha256=J6v5H7Jzvb-g81xp_2QACKwO7LxHQA6ikryMU7zXwN8,5273
+packaging/licenses/__init__.py,sha256=1x5M1nEYjcgwEbLt0dXwz2ukjr18DiCzC0sraQqJ-Ww,5715
+packaging/licenses/__pycache__/__init__.cpython-311.pyc,,
+packaging/licenses/__pycache__/_spdx.cpython-311.pyc,,
+packaging/licenses/_spdx.py,sha256=oAm1ztPFwlsmCKe7lAAsv_OIOfS1cWDu9bNBkeu-2ns,48398
+packaging/markers.py,sha256=c89TNzB7ZdGYhkovm6PYmqGyHxXlYVaLW591PHUNKD8,10561
+packaging/metadata.py,sha256=YJibM7GYe4re8-0a3OlXmGS-XDgTEoO4tlBt2q25Bng,34762
+packaging/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+packaging/requirements.py,sha256=gYyRSAdbrIyKDY66ugIDUQjRMvxkH2ALioTmX3tnL6o,2947
+packaging/specifiers.py,sha256=GG1wPNMcL0fMJO68vF53wKMdwnfehDcaI-r9NpTfilA,40074
+packaging/tags.py,sha256=CFqrJzAzc2XNGexerH__T-Y5Iwq7WbsYXsiLERLWxY0,21014
+packaging/utils.py,sha256=0F3Hh9OFuRgrhTgGZUl5K22Fv1YP2tZl1z_2gO6kJiA,5050
+packaging/version.py,sha256=olfyuk_DPbflNkJ4wBWetXQ17c74x3DB501degUv7DY,16676