koichi12 commited on
Commit
2e7ec00
·
verified ·
1 Parent(s): 13ae937

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_api.cpython-311.pyc +0 -0
  2. tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_error.cpython-311.pyc +0 -0
  3. tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc +0 -0
  4. tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/_api.py +323 -0
  5. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc +0 -0
  6. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc +0 -0
  7. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc +0 -0
  8. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc +0 -0
  9. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc +0 -0
  10. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc +0 -0
  11. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc +0 -0
  12. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py +287 -0
  13. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc +0 -0
  14. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/common.py +175 -0
  15. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/copy.py +557 -0
  16. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/approximation.cpython-311.pyc +0 -0
  17. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/differentiation.cpython-311.pyc +0 -0
  18. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__pycache__/__init__.cpython-311.pyc +0 -0
  19. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/__init__.py +0 -0
  20. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti.h +123 -0
  21. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_callbacks.h +760 -0
  22. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_checkpoint.h +127 -0
  23. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_nvtx_cbid.h +111 -0
  24. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_profiler_target.h +588 -0
  25. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_nvtx_meta.h +247 -0
  26. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_common.h +273 -0
  27. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_cuda_host.h +197 -0
  28. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_host.h +1471 -0
  29. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/__init__.py +0 -0
  30. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/__pycache__/__init__.cpython-311.pyc +0 -0
  31. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__init__.py +0 -0
  32. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__pycache__/__init__.cpython-311.pyc +0 -0
  33. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverMg.h +318 -0
  34. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverRf.h +339 -0
  35. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverSp_LOWLEVEL_PREVIEW.h +1107 -0
  36. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/lib/__init__.py +0 -0
  37. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/__pycache__/__init__.cpython-311.pyc +0 -0
  38. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/include/nccl_net.h +456 -0
  39. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtCudaRt.h +146 -0
  40. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCore.h +299 -0
  41. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h +112 -0
  42. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h +133 -0
  43. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h +192 -0
  44. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h +114 -0
  45. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInitDefs.h +565 -0
  46. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxTypes.h +333 -0
  47. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/lib/__init__.py +0 -0
  48. tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/INSTALLER +1 -0
  49. tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/LICENSE +3 -0
  50. tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/RECORD +41 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_api.cpython-311.pyc ADDED
Binary file (14.6 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_error.cpython-311.pyc ADDED
Binary file (1.97 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc ADDED
Binary file (667 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/_api.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import os
6
+ import time
7
+ import warnings
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+ from threading import local
11
+ from typing import TYPE_CHECKING, Any, ClassVar
12
+ from weakref import WeakValueDictionary
13
+
14
+ from ._error import Timeout
15
+
16
+ if TYPE_CHECKING:
17
+ import sys
18
+ from types import TracebackType
19
+
20
+ if sys.version_info >= (3, 11): # pragma: no cover (py311+)
21
+ from typing import Self
22
+ else: # pragma: no cover (<py311)
23
+ from typing_extensions import Self
24
+
25
+
26
+ _LOGGER = logging.getLogger("filelock")
27
+
28
+
29
+ # This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
30
+ # is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
31
+ # again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
32
+ class AcquireReturnProxy:
33
+ """A context aware object that will release the lock file when exiting."""
34
+
35
+ def __init__(self, lock: BaseFileLock) -> None:
36
+ self.lock = lock
37
+
38
+ def __enter__(self) -> BaseFileLock:
39
+ return self.lock
40
+
41
+ def __exit__(
42
+ self,
43
+ exc_type: type[BaseException] | None,
44
+ exc_value: BaseException | None,
45
+ traceback: TracebackType | None,
46
+ ) -> None:
47
+ self.lock.release()
48
+
49
+
50
+ @dataclass
51
+ class FileLockContext:
52
+ """A dataclass which holds the context for a ``BaseFileLock`` object."""
53
+
54
+ # The context is held in a separate class to allow optional use of thread local storage via the
55
+ # ThreadLocalFileContext class.
56
+
57
+ #: The path to the lock file.
58
+ lock_file: str
59
+
60
+ #: The default timeout value.
61
+ timeout: float
62
+
63
+ #: The mode for the lock files
64
+ mode: int
65
+
66
+ #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
67
+ lock_file_fd: int | None = None
68
+
69
+ #: The lock counter is used for implementing the nested locking mechanism.
70
+ lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0
71
+
72
+
73
+ class ThreadLocalFileContext(FileLockContext, local):
74
+ """A thread local version of the ``FileLockContext`` class."""
75
+
76
+
77
+ class BaseFileLock(ABC, contextlib.ContextDecorator):
78
+ """Abstract base class for a file lock object."""
79
+
80
+ _instances: ClassVar[WeakValueDictionary[str, BaseFileLock]] = WeakValueDictionary()
81
+
82
+ def __new__( # noqa: PLR0913
83
+ cls,
84
+ lock_file: str | os.PathLike[str],
85
+ timeout: float = -1, # noqa: ARG003
86
+ mode: int = 0o644, # noqa: ARG003
87
+ thread_local: bool = True, # noqa: ARG003, FBT001, FBT002
88
+ *,
89
+ is_singleton: bool = False,
90
+ **kwargs: dict[str, Any], # capture remaining kwargs for subclasses # noqa: ARG003
91
+ ) -> Self:
92
+ """Create a new lock object or if specified return the singleton instance for the lock file."""
93
+ if not is_singleton:
94
+ return super().__new__(cls)
95
+
96
+ instance = cls._instances.get(str(lock_file))
97
+ if not instance:
98
+ instance = super().__new__(cls)
99
+ cls._instances[str(lock_file)] = instance
100
+
101
+ return instance # type: ignore[return-value] # https://github.com/python/mypy/issues/15322
102
+
103
+ def __init__( # noqa: PLR0913
104
+ self,
105
+ lock_file: str | os.PathLike[str],
106
+ timeout: float = -1,
107
+ mode: int = 0o644,
108
+ thread_local: bool = True, # noqa: FBT001, FBT002
109
+ *,
110
+ is_singleton: bool = False,
111
+ ) -> None:
112
+ """
113
+ Create a new lock object.
114
+
115
+ :param lock_file: path to the file
116
+ :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
117
+ the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
118
+ to a negative value. A timeout of 0 means, that there is exactly one attempt to acquire the file lock.
119
+ :param mode: file permissions for the lockfile
120
+ :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
121
+ ``False`` then the lock will be reentrant across threads.
122
+ :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
123
+ per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
124
+ to pass the same object around.
125
+ """
126
+ self._is_thread_local = thread_local
127
+ self._is_singleton = is_singleton
128
+
129
+ # Create the context. Note that external code should not work with the context directly and should instead use
130
+ # properties of this class.
131
+ kwargs: dict[str, Any] = {
132
+ "lock_file": os.fspath(lock_file),
133
+ "timeout": timeout,
134
+ "mode": mode,
135
+ }
136
+ self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
137
+
138
+ def is_thread_local(self) -> bool:
139
+ """:return: a flag indicating if this lock is thread local or not"""
140
+ return self._is_thread_local
141
+
142
+ @property
143
+ def is_singleton(self) -> bool:
144
+ """:return: a flag indicating if this lock is singleton or not"""
145
+ return self._is_singleton
146
+
147
+ @property
148
+ def lock_file(self) -> str:
149
+ """:return: path to the lock file"""
150
+ return self._context.lock_file
151
+
152
+ @property
153
+ def timeout(self) -> float:
154
+ """
155
+ :return: the default timeout value, in seconds
156
+
157
+ .. versionadded:: 2.0.0
158
+ """
159
+ return self._context.timeout
160
+
161
+ @timeout.setter
162
+ def timeout(self, value: float | str) -> None:
163
+ """
164
+ Change the default timeout value.
165
+
166
+ :param value: the new value, in seconds
167
+ """
168
+ self._context.timeout = float(value)
169
+
170
+ @abstractmethod
171
+ def _acquire(self) -> None:
172
+ """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
173
+ raise NotImplementedError
174
+
175
+ @abstractmethod
176
+ def _release(self) -> None:
177
+ """Releases the lock and sets self._context.lock_file_fd to None."""
178
+ raise NotImplementedError
179
+
180
+ @property
181
+ def is_locked(self) -> bool:
182
+ """
183
+
184
+ :return: A boolean indicating if the lock file is holding the lock currently.
185
+
186
+ .. versionchanged:: 2.0.0
187
+
188
+ This was previously a method and is now a property.
189
+ """
190
+ return self._context.lock_file_fd is not None
191
+
192
+ @property
193
+ def lock_counter(self) -> int:
194
+ """:return: The number of times this lock has been acquired (but not yet released)."""
195
+ return self._context.lock_counter
196
+
197
+ def acquire(
198
+ self,
199
+ timeout: float | None = None,
200
+ poll_interval: float = 0.05,
201
+ *,
202
+ poll_intervall: float | None = None,
203
+ blocking: bool = True,
204
+ ) -> AcquireReturnProxy:
205
+ """
206
+ Try to acquire the file lock.
207
+
208
+ :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
209
+ if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
210
+ :param poll_interval: interval of trying to acquire the lock file
211
+ :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
212
+ :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
213
+ first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
214
+ :raises Timeout: if fails to acquire lock within the timeout period
215
+ :return: a context object that will unlock the file when the context is exited
216
+
217
+ .. code-block:: python
218
+
219
+ # You can use this method in the context manager (recommended)
220
+ with lock.acquire():
221
+ pass
222
+
223
+ # Or use an equivalent try-finally construct:
224
+ lock.acquire()
225
+ try:
226
+ pass
227
+ finally:
228
+ lock.release()
229
+
230
+ .. versionchanged:: 2.0.0
231
+
232
+ This method returns now a *proxy* object instead of *self*,
233
+ so that it can be used in a with statement without side effects.
234
+
235
+ """
236
+ # Use the default timeout, if no timeout is provided.
237
+ if timeout is None:
238
+ timeout = self._context.timeout
239
+
240
+ if poll_intervall is not None:
241
+ msg = "use poll_interval instead of poll_intervall"
242
+ warnings.warn(msg, DeprecationWarning, stacklevel=2)
243
+ poll_interval = poll_intervall
244
+
245
+ # Increment the number right at the beginning. We can still undo it, if something fails.
246
+ self._context.lock_counter += 1
247
+
248
+ lock_id = id(self)
249
+ lock_filename = self.lock_file
250
+ start_time = time.perf_counter()
251
+ try:
252
+ while True:
253
+ if not self.is_locked:
254
+ _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
255
+ self._acquire()
256
+ if self.is_locked:
257
+ _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
258
+ break
259
+ if blocking is False:
260
+ _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
261
+ raise Timeout(lock_filename) # noqa: TRY301
262
+ if 0 <= timeout < time.perf_counter() - start_time:
263
+ _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
264
+ raise Timeout(lock_filename) # noqa: TRY301
265
+ msg = "Lock %s not acquired on %s, waiting %s seconds ..."
266
+ _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
267
+ time.sleep(poll_interval)
268
+ except BaseException: # Something did go wrong, so decrement the counter.
269
+ self._context.lock_counter = max(0, self._context.lock_counter - 1)
270
+ raise
271
+ return AcquireReturnProxy(lock=self)
272
+
273
+ def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002
274
+ """
275
+ Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. Also
276
+ note, that the lock file itself is not automatically deleted.
277
+
278
+ :param force: If true, the lock counter is ignored and the lock is released in every case/
279
+ """
280
+ if self.is_locked:
281
+ self._context.lock_counter -= 1
282
+
283
+ if self._context.lock_counter == 0 or force:
284
+ lock_id, lock_filename = id(self), self.lock_file
285
+
286
+ _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
287
+ self._release()
288
+ self._context.lock_counter = 0
289
+ _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
290
+
291
+ def __enter__(self) -> Self:
292
+ """
293
+ Acquire the lock.
294
+
295
+ :return: the lock object
296
+ """
297
+ self.acquire()
298
+ return self
299
+
300
+ def __exit__(
301
+ self,
302
+ exc_type: type[BaseException] | None,
303
+ exc_value: BaseException | None,
304
+ traceback: TracebackType | None,
305
+ ) -> None:
306
+ """
307
+ Release the lock.
308
+
309
+ :param exc_type: the exception type if raised
310
+ :param exc_value: the exception value if raised
311
+ :param traceback: the exception traceback if raised
312
+ """
313
+ self.release()
314
+
315
+ def __del__(self) -> None:
316
+ """Called when the lock object is deleted."""
317
+ self.release(force=True)
318
+
319
+
320
+ __all__ = [
321
+ "BaseFileLock",
322
+ "AcquireReturnProxy",
323
+ ]
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc ADDED
Binary file (6.7 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc ADDED
Binary file (3.42 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc ADDED
Binary file (4.75 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc ADDED
Binary file (983 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc ADDED
Binary file (21.7 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc ADDED
Binary file (17.7 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc ADDED
Binary file (88.9 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from hashlib import md5
3
+
4
+ import pytest
5
+
6
+ from fsspec.implementations.local import LocalFileSystem
7
+ from fsspec.tests.abstract.copy import AbstractCopyTests # noqa
8
+ from fsspec.tests.abstract.get import AbstractGetTests # noqa
9
+ from fsspec.tests.abstract.put import AbstractPutTests # noqa
10
+
11
+
12
+ class BaseAbstractFixtures:
13
+ """
14
+ Abstract base class containing fixtures that are used by but never need to
15
+ be overridden in derived filesystem-specific classes to run the abstract
16
+ tests on such filesystems.
17
+ """
18
+
19
+ @pytest.fixture
20
+ def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
21
+ """
22
+ Scenario on remote filesystem that is used for many cp/get/put tests.
23
+
24
+ Cleans up at the end of each test it which it is used.
25
+ """
26
+ source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
27
+ yield source
28
+ fs.rm(source, recursive=True)
29
+
30
+ @pytest.fixture
31
+ def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
32
+ """
33
+ Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
34
+
35
+ Cleans up at the end of each test it which it is used.
36
+ """
37
+ source = self._glob_edge_cases_files(fs, fs_join, fs_path)
38
+ yield source
39
+ fs.rm(source, recursive=True)
40
+
41
+ @pytest.fixture
42
+ def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
43
+ """
44
+ Scenario on remote filesystem that is used to check cp/get/put on directory
45
+ and file with the same name prefixes.
46
+
47
+ Cleans up at the end of each test it which it is used.
48
+ """
49
+ source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
50
+ yield source
51
+ fs.rm(source, recursive=True)
52
+
53
+ @pytest.fixture
54
+ def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
55
+ """
56
+ Scenario on remote filesystem that is used to check cp/get/put files order
57
+ when source and destination are lists.
58
+
59
+ Cleans up at the end of each test it which it is used.
60
+ """
61
+ source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
62
+ yield source
63
+ fs.rm(source, recursive=True)
64
+
65
+ @pytest.fixture
66
+ def fs_target(self, fs, fs_join, fs_path):
67
+ """
68
+ Return name of remote directory that does not yet exist to copy into.
69
+
70
+ Cleans up at the end of each test it which it is used.
71
+ """
72
+ target = fs_join(fs_path, "target")
73
+ yield target
74
+ if fs.exists(target):
75
+ fs.rm(target, recursive=True)
76
+
77
+ @pytest.fixture
78
+ def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
79
+ """
80
+ Scenario on local filesystem that is used for many cp/get/put tests.
81
+
82
+ Cleans up at the end of each test it which it is used.
83
+ """
84
+ source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
85
+ yield source
86
+ local_fs.rm(source, recursive=True)
87
+
88
+ @pytest.fixture
89
+ def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
90
+ """
91
+ Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
92
+
93
+ Cleans up at the end of each test it which it is used.
94
+ """
95
+ source = self._glob_edge_cases_files(local_fs, local_join, local_path)
96
+ yield source
97
+ local_fs.rm(source, recursive=True)
98
+
99
+ @pytest.fixture
100
+ def local_dir_and_file_with_same_name_prefix(
101
+ self, local_fs, local_join, local_path
102
+ ):
103
+ """
104
+ Scenario on local filesystem that is used to check cp/get/put on directory
105
+ and file with the same name prefixes.
106
+
107
+ Cleans up at the end of each test it which it is used.
108
+ """
109
+ source = self._dir_and_file_with_same_name_prefix(
110
+ local_fs, local_join, local_path
111
+ )
112
+ yield source
113
+ local_fs.rm(source, recursive=True)
114
+
115
+ @pytest.fixture
116
+ def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
117
+ """
118
+ Scenario on local filesystem that is used to check cp/get/put files order
119
+ when source and destination are lists.
120
+
121
+ Cleans up at the end of each test it which it is used.
122
+ """
123
+ source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
124
+ yield source
125
+ local_fs.rm(source, recursive=True)
126
+
127
+ @pytest.fixture
128
+ def local_target(self, local_fs, local_join, local_path):
129
+ """
130
+ Return name of local directory that does not yet exist to copy into.
131
+
132
+ Cleans up at the end of each test it which it is used.
133
+ """
134
+ target = local_join(local_path, "target")
135
+ yield target
136
+ if local_fs.exists(target):
137
+ local_fs.rm(target, recursive=True)
138
+
139
+ def _glob_edge_cases_files(self, some_fs, some_join, some_path):
140
+ """
141
+ Scenario that is used for glob edge cases cp/get/put tests.
142
+ Creates the following directory and file structure:
143
+
144
+ 📁 source
145
+ ├── 📄 file1
146
+ ├── 📄 file2
147
+ ├── 📁 subdir0
148
+ │ ├── 📄 subfile1
149
+ │ ├── 📄 subfile2
150
+ │ └── 📁 nesteddir
151
+ │ └── 📄 nestedfile
152
+ └── 📁 subdir1
153
+ ├── 📄 subfile1
154
+ ├── 📄 subfile2
155
+ └── 📁 nesteddir
156
+ └── 📄 nestedfile
157
+ """
158
+ source = some_join(some_path, "source")
159
+ some_fs.touch(some_join(source, "file1"))
160
+ some_fs.touch(some_join(source, "file2"))
161
+
162
+ for subdir_idx in range(2):
163
+ subdir = some_join(source, f"subdir{subdir_idx}")
164
+ nesteddir = some_join(subdir, "nesteddir")
165
+ some_fs.makedirs(nesteddir)
166
+ some_fs.touch(some_join(subdir, "subfile1"))
167
+ some_fs.touch(some_join(subdir, "subfile2"))
168
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
169
+
170
+ return source
171
+
172
+ def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
173
+ """
174
+ Scenario that is used for many cp/get/put tests. Creates the following
175
+ directory and file structure:
176
+
177
+ 📁 source
178
+ ├── 📄 file1
179
+ ├── 📄 file2
180
+ └── 📁 subdir
181
+ ├── 📄 subfile1
182
+ ├── 📄 subfile2
183
+ └── 📁 nesteddir
184
+ └── 📄 nestedfile
185
+ """
186
+ source = some_join(some_path, "source")
187
+ subdir = some_join(source, "subdir")
188
+ nesteddir = some_join(subdir, "nesteddir")
189
+ some_fs.makedirs(nesteddir)
190
+ some_fs.touch(some_join(source, "file1"))
191
+ some_fs.touch(some_join(source, "file2"))
192
+ some_fs.touch(some_join(subdir, "subfile1"))
193
+ some_fs.touch(some_join(subdir, "subfile2"))
194
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
195
+ return source
196
+
197
+ def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
198
+ """
199
+ Scenario that is used to check cp/get/put on directory and file with
200
+ the same name prefixes. Creates the following directory and file structure:
201
+
202
+ 📁 source
203
+ ├── 📄 subdir.txt
204
+ └── 📁 subdir
205
+ └── 📄 subfile.txt
206
+ """
207
+ source = some_join(some_path, "source")
208
+ subdir = some_join(source, "subdir")
209
+ file = some_join(source, "subdir.txt")
210
+ subfile = some_join(subdir, "subfile.txt")
211
+ some_fs.makedirs(subdir)
212
+ some_fs.touch(file)
213
+ some_fs.touch(subfile)
214
+ return source
215
+
216
+ def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
217
+ """
218
+ Scenario that is used to check cp/get/put files order when source and
219
+ destination are lists. Creates the following directory and file structure:
220
+
221
+ 📁 source
222
+ └── 📄 {hashed([0-9])}.txt
223
+ """
224
+ source = some_join(some_path, "source")
225
+ for i in range(10):
226
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
227
+ path = some_join(source, f"{hashed_i}.txt")
228
+ some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
229
+ return source
230
+
231
+
232
+ class AbstractFixtures(BaseAbstractFixtures):
233
+ """
234
+ Abstract base class containing fixtures that may be overridden in derived
235
+ filesystem-specific classes to run the abstract tests on such filesystems.
236
+
237
+ For any particular filesystem some of these fixtures must be overridden,
238
+ such as ``fs`` and ``fs_path``, and others may be overridden if the
239
+ default functions here are not appropriate, such as ``fs_join``.
240
+ """
241
+
242
+ @pytest.fixture
243
+ def fs(self):
244
+ raise NotImplementedError("This function must be overridden in derived classes")
245
+
246
+ @pytest.fixture
247
+ def fs_join(self):
248
+ """
249
+ Return a function that joins its arguments together into a path.
250
+
251
+ Most fsspec implementations join paths in a platform-dependent way,
252
+ but some will override this to always use a forward slash.
253
+ """
254
+ return os.path.join
255
+
256
+ @pytest.fixture
257
+ def fs_path(self):
258
+ raise NotImplementedError("This function must be overridden in derived classes")
259
+
260
+ @pytest.fixture(scope="class")
261
+ def local_fs(self):
262
+ # Maybe need an option for auto_mkdir=False? This is only relevant
263
+ # for certain implementations.
264
+ return LocalFileSystem(auto_mkdir=True)
265
+
266
+ @pytest.fixture
267
+ def local_join(self):
268
+ """
269
+ Return a function that joins its arguments together into a path, on
270
+ the local filesystem.
271
+ """
272
+ return os.path.join
273
+
274
+ @pytest.fixture
275
+ def local_path(self, tmpdir):
276
+ return tmpdir
277
+
278
+ @pytest.fixture
279
+ def supports_empty_directories(self):
280
+ """
281
+ Return whether this implementation supports empty directories.
282
+ """
283
+ return True
284
+
285
+ @pytest.fixture
286
+ def fs_sanitize_path(self):
287
+ return lambda x: x
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc ADDED
Binary file (26.3 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/common.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GLOB_EDGE_CASES_TESTS = {
2
+ "argnames": ("path", "recursive", "maxdepth", "expected"),
3
+ "argvalues": [
4
+ ("fil?1", False, None, ["file1"]),
5
+ ("fil?1", True, None, ["file1"]),
6
+ ("file[1-2]", False, None, ["file1", "file2"]),
7
+ ("file[1-2]", True, None, ["file1", "file2"]),
8
+ ("*", False, None, ["file1", "file2"]),
9
+ (
10
+ "*",
11
+ True,
12
+ None,
13
+ [
14
+ "file1",
15
+ "file2",
16
+ "subdir0/subfile1",
17
+ "subdir0/subfile2",
18
+ "subdir0/nesteddir/nestedfile",
19
+ "subdir1/subfile1",
20
+ "subdir1/subfile2",
21
+ "subdir1/nesteddir/nestedfile",
22
+ ],
23
+ ),
24
+ ("*", True, 1, ["file1", "file2"]),
25
+ (
26
+ "*",
27
+ True,
28
+ 2,
29
+ [
30
+ "file1",
31
+ "file2",
32
+ "subdir0/subfile1",
33
+ "subdir0/subfile2",
34
+ "subdir1/subfile1",
35
+ "subdir1/subfile2",
36
+ ],
37
+ ),
38
+ ("*1", False, None, ["file1"]),
39
+ (
40
+ "*1",
41
+ True,
42
+ None,
43
+ [
44
+ "file1",
45
+ "subdir1/subfile1",
46
+ "subdir1/subfile2",
47
+ "subdir1/nesteddir/nestedfile",
48
+ ],
49
+ ),
50
+ ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
51
+ (
52
+ "**",
53
+ False,
54
+ None,
55
+ [
56
+ "file1",
57
+ "file2",
58
+ "subdir0/subfile1",
59
+ "subdir0/subfile2",
60
+ "subdir0/nesteddir/nestedfile",
61
+ "subdir1/subfile1",
62
+ "subdir1/subfile2",
63
+ "subdir1/nesteddir/nestedfile",
64
+ ],
65
+ ),
66
+ (
67
+ "**",
68
+ True,
69
+ None,
70
+ [
71
+ "file1",
72
+ "file2",
73
+ "subdir0/subfile1",
74
+ "subdir0/subfile2",
75
+ "subdir0/nesteddir/nestedfile",
76
+ "subdir1/subfile1",
77
+ "subdir1/subfile2",
78
+ "subdir1/nesteddir/nestedfile",
79
+ ],
80
+ ),
81
+ ("**", True, 1, ["file1", "file2"]),
82
+ (
83
+ "**",
84
+ True,
85
+ 2,
86
+ [
87
+ "file1",
88
+ "file2",
89
+ "subdir0/subfile1",
90
+ "subdir0/subfile2",
91
+ "subdir0/nesteddir/nestedfile",
92
+ "subdir1/subfile1",
93
+ "subdir1/subfile2",
94
+ "subdir1/nesteddir/nestedfile",
95
+ ],
96
+ ),
97
+ (
98
+ "**",
99
+ False,
100
+ 2,
101
+ [
102
+ "file1",
103
+ "file2",
104
+ "subdir0/subfile1",
105
+ "subdir0/subfile2",
106
+ "subdir1/subfile1",
107
+ "subdir1/subfile2",
108
+ ],
109
+ ),
110
+ ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
111
+ (
112
+ "**/*1",
113
+ True,
114
+ None,
115
+ [
116
+ "file1",
117
+ "subdir0/subfile1",
118
+ "subdir1/subfile1",
119
+ "subdir1/subfile2",
120
+ "subdir1/nesteddir/nestedfile",
121
+ ],
122
+ ),
123
+ ("**/*1", True, 1, ["file1"]),
124
+ (
125
+ "**/*1",
126
+ True,
127
+ 2,
128
+ ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
129
+ ),
130
+ ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
131
+ ("**/subdir0", False, None, []),
132
+ ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
133
+ ("**/subdir0/nested*", False, 2, []),
134
+ ("**/subdir0/nested*", True, 2, ["nestedfile"]),
135
+ ("subdir[1-2]", False, None, []),
136
+ ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
137
+ ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
138
+ ("subdir[0-1]", False, None, []),
139
+ (
140
+ "subdir[0-1]",
141
+ True,
142
+ None,
143
+ [
144
+ "subdir0/subfile1",
145
+ "subdir0/subfile2",
146
+ "subdir0/nesteddir/nestedfile",
147
+ "subdir1/subfile1",
148
+ "subdir1/subfile2",
149
+ "subdir1/nesteddir/nestedfile",
150
+ ],
151
+ ),
152
+ (
153
+ "subdir[0-1]/*fil[e]*",
154
+ False,
155
+ None,
156
+ [
157
+ "subdir0/subfile1",
158
+ "subdir0/subfile2",
159
+ "subdir1/subfile1",
160
+ "subdir1/subfile2",
161
+ ],
162
+ ),
163
+ (
164
+ "subdir[0-1]/*fil[e]*",
165
+ True,
166
+ None,
167
+ [
168
+ "subdir0/subfile1",
169
+ "subdir0/subfile2",
170
+ "subdir1/subfile1",
171
+ "subdir1/subfile2",
172
+ ],
173
+ ),
174
+ ],
175
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec/tests/abstract/copy.py ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
7
+
8
+
9
+ class AbstractCopyTests:
10
+ def test_copy_file_to_existing_directory(
11
+ self,
12
+ fs,
13
+ fs_join,
14
+ fs_bulk_operations_scenario_0,
15
+ fs_target,
16
+ supports_empty_directories,
17
+ ):
18
+ # Copy scenario 1a
19
+ source = fs_bulk_operations_scenario_0
20
+
21
+ target = fs_target
22
+ fs.mkdir(target)
23
+ if not supports_empty_directories:
24
+ # Force target directory to exist by adding a dummy file
25
+ fs.touch(fs_join(target, "dummy"))
26
+ assert fs.isdir(target)
27
+
28
+ target_file2 = fs_join(target, "file2")
29
+ target_subfile1 = fs_join(target, "subfile1")
30
+
31
+ # Copy from source directory
32
+ fs.cp(fs_join(source, "file2"), target)
33
+ assert fs.isfile(target_file2)
34
+
35
+ # Copy from sub directory
36
+ fs.cp(fs_join(source, "subdir", "subfile1"), target)
37
+ assert fs.isfile(target_subfile1)
38
+
39
+ # Remove copied files
40
+ fs.rm([target_file2, target_subfile1])
41
+ assert not fs.exists(target_file2)
42
+ assert not fs.exists(target_subfile1)
43
+
44
+ # Repeat with trailing slash on target
45
+ fs.cp(fs_join(source, "file2"), target + "/")
46
+ assert fs.isdir(target)
47
+ assert fs.isfile(target_file2)
48
+
49
+ fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
50
+ assert fs.isfile(target_subfile1)
51
+
52
+ def test_copy_file_to_new_directory(
53
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
54
+ ):
55
+ # Copy scenario 1b
56
+ source = fs_bulk_operations_scenario_0
57
+
58
+ target = fs_target
59
+ fs.mkdir(target)
60
+
61
+ fs.cp(
62
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
63
+ ) # Note trailing slash
64
+ assert fs.isdir(target)
65
+ assert fs.isdir(fs_join(target, "newdir"))
66
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
67
+
68
+ def test_copy_file_to_file_in_existing_directory(
69
+ self,
70
+ fs,
71
+ fs_join,
72
+ fs_bulk_operations_scenario_0,
73
+ fs_target,
74
+ supports_empty_directories,
75
+ ):
76
+ # Copy scenario 1c
77
+ source = fs_bulk_operations_scenario_0
78
+
79
+ target = fs_target
80
+ fs.mkdir(target)
81
+ if not supports_empty_directories:
82
+ # Force target directory to exist by adding a dummy file
83
+ fs.touch(fs_join(target, "dummy"))
84
+ assert fs.isdir(target)
85
+
86
+ fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
87
+ assert fs.isfile(fs_join(target, "newfile"))
88
+
89
+ def test_copy_file_to_file_in_new_directory(
90
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
91
+ ):
92
+ # Copy scenario 1d
93
+ source = fs_bulk_operations_scenario_0
94
+
95
+ target = fs_target
96
+ fs.mkdir(target)
97
+
98
+ fs.cp(
99
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
100
+ )
101
+ assert fs.isdir(fs_join(target, "newdir"))
102
+ assert fs.isfile(fs_join(target, "newdir", "newfile"))
103
+
104
+ def test_copy_directory_to_existing_directory(
105
+ self,
106
+ fs,
107
+ fs_join,
108
+ fs_bulk_operations_scenario_0,
109
+ fs_target,
110
+ supports_empty_directories,
111
+ ):
112
+ # Copy scenario 1e
113
+ source = fs_bulk_operations_scenario_0
114
+
115
+ target = fs_target
116
+ fs.mkdir(target)
117
+ if not supports_empty_directories:
118
+ # Force target directory to exist by adding a dummy file
119
+ dummy = fs_join(target, "dummy")
120
+ fs.touch(dummy)
121
+ assert fs.isdir(target)
122
+
123
+ for source_slash, target_slash in zip([False, True], [False, True]):
124
+ s = fs_join(source, "subdir")
125
+ if source_slash:
126
+ s += "/"
127
+ t = target + "/" if target_slash else target
128
+
129
+ # Without recursive does nothing
130
+ fs.cp(s, t)
131
+ assert fs.ls(target, detail=False) == (
132
+ [] if supports_empty_directories else [dummy]
133
+ )
134
+
135
+ # With recursive
136
+ fs.cp(s, t, recursive=True)
137
+ if source_slash:
138
+ assert fs.isfile(fs_join(target, "subfile1"))
139
+ assert fs.isfile(fs_join(target, "subfile2"))
140
+ assert fs.isdir(fs_join(target, "nesteddir"))
141
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
142
+ assert not fs.exists(fs_join(target, "subdir"))
143
+
144
+ fs.rm(
145
+ [
146
+ fs_join(target, "subfile1"),
147
+ fs_join(target, "subfile2"),
148
+ fs_join(target, "nesteddir"),
149
+ ],
150
+ recursive=True,
151
+ )
152
+ else:
153
+ assert fs.isdir(fs_join(target, "subdir"))
154
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
155
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
156
+ assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
157
+ assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
158
+
159
+ fs.rm(fs_join(target, "subdir"), recursive=True)
160
+ assert fs.ls(target, detail=False) == (
161
+ [] if supports_empty_directories else [dummy]
162
+ )
163
+
164
+ # Limit recursive by maxdepth
165
+ fs.cp(s, t, recursive=True, maxdepth=1)
166
+ if source_slash:
167
+ assert fs.isfile(fs_join(target, "subfile1"))
168
+ assert fs.isfile(fs_join(target, "subfile2"))
169
+ assert not fs.exists(fs_join(target, "nesteddir"))
170
+ assert not fs.exists(fs_join(target, "subdir"))
171
+
172
+ fs.rm(
173
+ [
174
+ fs_join(target, "subfile1"),
175
+ fs_join(target, "subfile2"),
176
+ ],
177
+ recursive=True,
178
+ )
179
+ else:
180
+ assert fs.isdir(fs_join(target, "subdir"))
181
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
182
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
183
+ assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
184
+
185
+ fs.rm(fs_join(target, "subdir"), recursive=True)
186
+ assert fs.ls(target, detail=False) == (
187
+ [] if supports_empty_directories else [dummy]
188
+ )
189
+
190
+ def test_copy_directory_to_new_directory(
191
+ self,
192
+ fs,
193
+ fs_join,
194
+ fs_bulk_operations_scenario_0,
195
+ fs_target,
196
+ supports_empty_directories,
197
+ ):
198
+ # Copy scenario 1f
199
+ source = fs_bulk_operations_scenario_0
200
+
201
+ target = fs_target
202
+ fs.mkdir(target)
203
+
204
+ for source_slash, target_slash in zip([False, True], [False, True]):
205
+ s = fs_join(source, "subdir")
206
+ if source_slash:
207
+ s += "/"
208
+ t = fs_join(target, "newdir")
209
+ if target_slash:
210
+ t += "/"
211
+
212
+ # Without recursive does nothing
213
+ fs.cp(s, t)
214
+ if supports_empty_directories:
215
+ assert fs.ls(target) == []
216
+ else:
217
+ with pytest.raises(FileNotFoundError):
218
+ fs.ls(target)
219
+
220
+ # With recursive
221
+ fs.cp(s, t, recursive=True)
222
+ assert fs.isdir(fs_join(target, "newdir"))
223
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
224
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
225
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
226
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
227
+ assert not fs.exists(fs_join(target, "subdir"))
228
+
229
+ fs.rm(fs_join(target, "newdir"), recursive=True)
230
+ assert not fs.exists(fs_join(target, "newdir"))
231
+
232
+ # Limit recursive by maxdepth
233
+ fs.cp(s, t, recursive=True, maxdepth=1)
234
+ assert fs.isdir(fs_join(target, "newdir"))
235
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
236
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
237
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
238
+ assert not fs.exists(fs_join(target, "subdir"))
239
+
240
+ fs.rm(fs_join(target, "newdir"), recursive=True)
241
+ assert not fs.exists(fs_join(target, "newdir"))
242
+
243
+ def test_copy_glob_to_existing_directory(
244
+ self,
245
+ fs,
246
+ fs_join,
247
+ fs_bulk_operations_scenario_0,
248
+ fs_target,
249
+ supports_empty_directories,
250
+ ):
251
+ # Copy scenario 1g
252
+ source = fs_bulk_operations_scenario_0
253
+
254
+ target = fs_target
255
+ fs.mkdir(target)
256
+ if not supports_empty_directories:
257
+ # Force target directory to exist by adding a dummy file
258
+ dummy = fs_join(target, "dummy")
259
+ fs.touch(dummy)
260
+ assert fs.isdir(target)
261
+
262
+ for target_slash in [False, True]:
263
+ t = target + "/" if target_slash else target
264
+
265
+ # Without recursive
266
+ fs.cp(fs_join(source, "subdir", "*"), t)
267
+ assert fs.isfile(fs_join(target, "subfile1"))
268
+ assert fs.isfile(fs_join(target, "subfile2"))
269
+ assert not fs.isdir(fs_join(target, "nesteddir"))
270
+ assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
271
+ assert not fs.exists(fs_join(target, "subdir"))
272
+
273
+ fs.rm(
274
+ [
275
+ fs_join(target, "subfile1"),
276
+ fs_join(target, "subfile2"),
277
+ ],
278
+ recursive=True,
279
+ )
280
+ assert fs.ls(target, detail=False) == (
281
+ [] if supports_empty_directories else [dummy]
282
+ )
283
+
284
+ # With recursive
285
+ for glob, recursive in zip(["*", "**"], [True, False]):
286
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
287
+ assert fs.isfile(fs_join(target, "subfile1"))
288
+ assert fs.isfile(fs_join(target, "subfile2"))
289
+ assert fs.isdir(fs_join(target, "nesteddir"))
290
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
291
+ assert not fs.exists(fs_join(target, "subdir"))
292
+
293
+ fs.rm(
294
+ [
295
+ fs_join(target, "subfile1"),
296
+ fs_join(target, "subfile2"),
297
+ fs_join(target, "nesteddir"),
298
+ ],
299
+ recursive=True,
300
+ )
301
+ assert fs.ls(target, detail=False) == (
302
+ [] if supports_empty_directories else [dummy]
303
+ )
304
+
305
+ # Limit recursive by maxdepth
306
+ fs.cp(
307
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
308
+ )
309
+ assert fs.isfile(fs_join(target, "subfile1"))
310
+ assert fs.isfile(fs_join(target, "subfile2"))
311
+ assert not fs.exists(fs_join(target, "nesteddir"))
312
+ assert not fs.exists(fs_join(target, "subdir"))
313
+
314
+ fs.rm(
315
+ [
316
+ fs_join(target, "subfile1"),
317
+ fs_join(target, "subfile2"),
318
+ ],
319
+ recursive=True,
320
+ )
321
+ assert fs.ls(target, detail=False) == (
322
+ [] if supports_empty_directories else [dummy]
323
+ )
324
+
325
+ def test_copy_glob_to_new_directory(
326
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
327
+ ):
328
+ # Copy scenario 1h
329
+ source = fs_bulk_operations_scenario_0
330
+
331
+ target = fs_target
332
+ fs.mkdir(target)
333
+
334
+ for target_slash in [False, True]:
335
+ t = fs_join(target, "newdir")
336
+ if target_slash:
337
+ t += "/"
338
+
339
+ # Without recursive
340
+ fs.cp(fs_join(source, "subdir", "*"), t)
341
+ assert fs.isdir(fs_join(target, "newdir"))
342
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
343
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
344
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
345
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
346
+ assert not fs.exists(fs_join(target, "subdir"))
347
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
348
+
349
+ fs.rm(fs_join(target, "newdir"), recursive=True)
350
+ assert not fs.exists(fs_join(target, "newdir"))
351
+
352
+ # With recursive
353
+ for glob, recursive in zip(["*", "**"], [True, False]):
354
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
355
+ assert fs.isdir(fs_join(target, "newdir"))
356
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
357
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
358
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
359
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
360
+ assert not fs.exists(fs_join(target, "subdir"))
361
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
362
+
363
+ fs.rm(fs_join(target, "newdir"), recursive=True)
364
+ assert not fs.exists(fs_join(target, "newdir"))
365
+
366
+ # Limit recursive by maxdepth
367
+ fs.cp(
368
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
369
+ )
370
+ assert fs.isdir(fs_join(target, "newdir"))
371
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
372
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
373
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
374
+ assert not fs.exists(fs_join(target, "subdir"))
375
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
376
+
377
+ fs.rm(fs_join(target, "newdir"), recursive=True)
378
+ assert not fs.exists(fs_join(target, "newdir"))
379
+
380
+ @pytest.mark.parametrize(
381
+ GLOB_EDGE_CASES_TESTS["argnames"],
382
+ GLOB_EDGE_CASES_TESTS["argvalues"],
383
+ )
384
+ def test_copy_glob_edge_cases(
385
+ self,
386
+ path,
387
+ recursive,
388
+ maxdepth,
389
+ expected,
390
+ fs,
391
+ fs_join,
392
+ fs_glob_edge_cases_files,
393
+ fs_target,
394
+ fs_sanitize_path,
395
+ ):
396
+ # Copy scenario 1g
397
+ source = fs_glob_edge_cases_files
398
+
399
+ target = fs_target
400
+
401
+ for new_dir, target_slash in product([True, False], [True, False]):
402
+ fs.mkdir(target)
403
+
404
+ t = fs_join(target, "newdir") if new_dir else target
405
+ t = t + "/" if target_slash else t
406
+
407
+ fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
408
+
409
+ output = fs.find(target)
410
+ if new_dir:
411
+ prefixed_expected = [
412
+ fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
413
+ ]
414
+ else:
415
+ prefixed_expected = [
416
+ fs_sanitize_path(fs_join(target, p)) for p in expected
417
+ ]
418
+ assert sorted(output) == sorted(prefixed_expected)
419
+
420
+ try:
421
+ fs.rm(target, recursive=True)
422
+ except FileNotFoundError:
423
+ pass
424
+
425
+ def test_copy_list_of_files_to_existing_directory(
426
+ self,
427
+ fs,
428
+ fs_join,
429
+ fs_bulk_operations_scenario_0,
430
+ fs_target,
431
+ supports_empty_directories,
432
+ ):
433
+ # Copy scenario 2a
434
+ source = fs_bulk_operations_scenario_0
435
+
436
+ target = fs_target
437
+ fs.mkdir(target)
438
+ if not supports_empty_directories:
439
+ # Force target directory to exist by adding a dummy file
440
+ dummy = fs_join(target, "dummy")
441
+ fs.touch(dummy)
442
+ assert fs.isdir(target)
443
+
444
+ source_files = [
445
+ fs_join(source, "file1"),
446
+ fs_join(source, "file2"),
447
+ fs_join(source, "subdir", "subfile1"),
448
+ ]
449
+
450
+ for target_slash in [False, True]:
451
+ t = target + "/" if target_slash else target
452
+
453
+ fs.cp(source_files, t)
454
+ assert fs.isfile(fs_join(target, "file1"))
455
+ assert fs.isfile(fs_join(target, "file2"))
456
+ assert fs.isfile(fs_join(target, "subfile1"))
457
+
458
+ fs.rm(
459
+ [
460
+ fs_join(target, "file1"),
461
+ fs_join(target, "file2"),
462
+ fs_join(target, "subfile1"),
463
+ ],
464
+ recursive=True,
465
+ )
466
+ assert fs.ls(target, detail=False) == (
467
+ [] if supports_empty_directories else [dummy]
468
+ )
469
+
470
+ def test_copy_list_of_files_to_new_directory(
471
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
472
+ ):
473
+ # Copy scenario 2b
474
+ source = fs_bulk_operations_scenario_0
475
+
476
+ target = fs_target
477
+ fs.mkdir(target)
478
+
479
+ source_files = [
480
+ fs_join(source, "file1"),
481
+ fs_join(source, "file2"),
482
+ fs_join(source, "subdir", "subfile1"),
483
+ ]
484
+
485
+ fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
486
+ assert fs.isdir(fs_join(target, "newdir"))
487
+ assert fs.isfile(fs_join(target, "newdir", "file1"))
488
+ assert fs.isfile(fs_join(target, "newdir", "file2"))
489
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
490
+
491
+ def test_copy_two_files_new_directory(
492
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
493
+ ):
494
+ # This is a duplicate of test_copy_list_of_files_to_new_directory and
495
+ # can eventually be removed.
496
+ source = fs_bulk_operations_scenario_0
497
+
498
+ target = fs_target
499
+ assert not fs.exists(target)
500
+ fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
501
+
502
+ assert fs.isdir(target)
503
+ assert fs.isfile(fs_join(target, "file1"))
504
+ assert fs.isfile(fs_join(target, "file2"))
505
+
506
+ def test_copy_directory_without_files_with_same_name_prefix(
507
+ self,
508
+ fs,
509
+ fs_join,
510
+ fs_target,
511
+ fs_dir_and_file_with_same_name_prefix,
512
+ supports_empty_directories,
513
+ ):
514
+ # Create the test dirs
515
+ source = fs_dir_and_file_with_same_name_prefix
516
+ target = fs_target
517
+
518
+ # Test without glob
519
+ fs.cp(fs_join(source, "subdir"), target, recursive=True)
520
+
521
+ assert fs.isfile(fs_join(target, "subfile.txt"))
522
+ assert not fs.isfile(fs_join(target, "subdir.txt"))
523
+
524
+ fs.rm([fs_join(target, "subfile.txt")])
525
+ if supports_empty_directories:
526
+ assert fs.ls(target) == []
527
+ else:
528
+ assert not fs.exists(target)
529
+
530
+ # Test with glob
531
+ fs.cp(fs_join(source, "subdir*"), target, recursive=True)
532
+
533
+ assert fs.isdir(fs_join(target, "subdir"))
534
+ assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
535
+ assert fs.isfile(fs_join(target, "subdir.txt"))
536
+
537
+ def test_copy_with_source_and_destination_as_list(
538
+ self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
539
+ ):
540
+ # Create the test dir
541
+ source = fs_10_files_with_hashed_names
542
+ target = fs_target
543
+
544
+ # Create list of files for source and destination
545
+ source_files = []
546
+ destination_files = []
547
+ for i in range(10):
548
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
549
+ source_files.append(fs_join(source, f"{hashed_i}.txt"))
550
+ destination_files.append(fs_join(target, f"{hashed_i}.txt"))
551
+
552
+ # Copy and assert order was kept
553
+ fs.copy(path1=source_files, path2=destination_files)
554
+
555
+ for i in range(10):
556
+ file_content = fs.cat(destination_files[i]).decode("utf-8")
557
+ assert file_content == str(i)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/approximation.cpython-311.pyc ADDED
Binary file (12.7 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/calculus/__pycache__/differentiation.cpython-311.pyc ADDED
Binary file (28.4 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (207 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/__init__.py ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti.h ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2017 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_H_)
51
+ #define _CUPTI_H_
52
+
53
+ #ifdef _WIN32
54
+ #ifndef WIN32_LEAN_AND_MEAN
55
+ #define WIN32_LEAN_AND_MEAN
56
+ #endif
57
+ #ifdef NOMINMAX
58
+ #include <windows.h>
59
+ #else
60
+ #define NOMINMAX
61
+ #include <windows.h>
62
+ #undef NOMINMAX
63
+ #endif
64
+ #endif
65
+
66
+ #include <cuda.h>
67
+ #include <cupti_result.h>
68
+ #include <cupti_version.h>
69
+
70
+ /* Activity, callback, event and metric APIs */
71
+ #include <cupti_activity.h>
72
+ #include <cupti_callbacks.h>
73
+ #include <cupti_events.h>
74
+ #include <cupti_metrics.h>
75
+
76
+ /* Runtime, driver, and nvtx function identifiers */
77
+ #include <cupti_driver_cbid.h>
78
+ #include <cupti_runtime_cbid.h>
79
+ #include <cupti_nvtx_cbid.h>
80
+
81
+ /* To support function parameter structures for obsoleted API. See
82
+ cuda.h for the actual definition of these structures. */
83
+ typedef unsigned int CUdeviceptr_v1;
84
+ typedef struct CUDA_MEMCPY2D_v1_st { int dummy; } CUDA_MEMCPY2D_v1;
85
+ typedef struct CUDA_MEMCPY3D_v1_st { int dummy; } CUDA_MEMCPY3D_v1;
86
+ typedef struct CUDA_ARRAY_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY_DESCRIPTOR_v1;
87
+ typedef struct CUDA_ARRAY3D_DESCRIPTOR_v1_st { int dummy; } CUDA_ARRAY3D_DESCRIPTOR_v1;
88
+
89
+ /* Function parameter structures */
90
+ #include <generated_cuda_runtime_api_meta.h>
91
+ #include <generated_cuda_meta.h>
92
+
93
+ /* The following parameter structures cannot be included unless a
94
+ header that defines GL_VERSION is included before including them.
95
+ If these are needed then make sure such a header is included
96
+ already. */
97
+ #ifdef GL_VERSION
98
+ #include <generated_cuda_gl_interop_meta.h>
99
+ #include <generated_cudaGL_meta.h>
100
+ #endif
101
+
102
+ //#include <generated_nvtx_meta.h>
103
+
104
+ /* The following parameter structures cannot be included by default as
105
+ they are not guaranteed to be available on all systems. Uncomment
106
+ the includes that are available, or use the include explicitly. */
107
+ #if defined(__linux__)
108
+ //#include <generated_cuda_vdpau_interop_meta.h>
109
+ //#include <generated_cudaVDPAU_meta.h>
110
+ #endif
111
+
112
+ #ifdef _WIN32
113
+ //#include <generated_cuda_d3d9_interop_meta.h>
114
+ //#include <generated_cuda_d3d10_interop_meta.h>
115
+ //#include <generated_cuda_d3d11_interop_meta.h>
116
+ //#include <generated_cudaD3D9_meta.h>
117
+ //#include <generated_cudaD3D10_meta.h>
118
+ //#include <generated_cudaD3D11_meta.h>
119
+ #endif
120
+
121
+ #endif /*_CUPTI_H_*/
122
+
123
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_callbacks.h ADDED
@@ -0,0 +1,760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2020 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(__CUPTI_CALLBACKS_H__)
51
+ #define __CUPTI_CALLBACKS_H__
52
+
53
+ #include <cuda.h>
54
+ #include <builtin_types.h>
55
+ #include <string.h>
56
+ #include <cuda_stdint.h>
57
+ #include <cupti_result.h>
58
+
59
+ #ifndef CUPTIAPI
60
+ #ifdef _WIN32
61
+ #define CUPTIAPI __stdcall
62
+ #else
63
+ #define CUPTIAPI
64
+ #endif
65
+ #endif
66
+
67
+ #if defined(__cplusplus)
68
+ extern "C" {
69
+ #endif
70
+
71
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
72
+ #pragma GCC visibility push(default)
73
+ #endif
74
+
75
+ /**
76
+ * \defgroup CUPTI_CALLBACK_API CUPTI Callback API
77
+ * Functions, types, and enums that implement the CUPTI Callback API.
78
+ * @{
79
+ */
80
+
81
+ /**
82
+ * \brief Specifies the point in an API call that a callback is issued.
83
+ *
84
+ * Specifies the point in an API call that a callback is issued. This
85
+ * value is communicated to the callback function via \ref
86
+ * CUpti_CallbackData::callbackSite.
87
+ */
88
+ typedef enum {
89
+ /**
90
+ * The callback is at the entry of the API call.
91
+ */
92
+ CUPTI_API_ENTER = 0,
93
+ /**
94
+ * The callback is at the exit of the API call.
95
+ */
96
+ CUPTI_API_EXIT = 1,
97
+ CUPTI_API_CBSITE_FORCE_INT = 0x7fffffff
98
+ } CUpti_ApiCallbackSite;
99
+
100
+ /**
101
+ * \brief Callback domains.
102
+ *
103
+ * Callback domains. Each domain represents callback points for a
104
+ * group of related API functions or CUDA driver activity.
105
+ */
106
+ typedef enum {
107
+ /**
108
+ * Invalid domain.
109
+ */
110
+ CUPTI_CB_DOMAIN_INVALID = 0,
111
+ /**
112
+ * Domain containing callback points for all driver API functions.
113
+ */
114
+ CUPTI_CB_DOMAIN_DRIVER_API = 1,
115
+ /**
116
+ * Domain containing callback points for all runtime API
117
+ * functions.
118
+ */
119
+ CUPTI_CB_DOMAIN_RUNTIME_API = 2,
120
+ /**
121
+ * Domain containing callback points for CUDA resource tracking.
122
+ */
123
+ CUPTI_CB_DOMAIN_RESOURCE = 3,
124
+ /**
125
+ * Domain containing callback points for CUDA synchronization.
126
+ */
127
+ CUPTI_CB_DOMAIN_SYNCHRONIZE = 4,
128
+ /**
129
+ * Domain containing callback points for NVTX API functions.
130
+ */
131
+ CUPTI_CB_DOMAIN_NVTX = 5,
132
+ CUPTI_CB_DOMAIN_SIZE = 6,
133
+ CUPTI_CB_DOMAIN_FORCE_INT = 0x7fffffff
134
+ } CUpti_CallbackDomain;
135
+
136
+ /**
137
+ * \brief Callback IDs for resource domain.
138
+ *
139
+ * Callback IDs for resource domain, CUPTI_CB_DOMAIN_RESOURCE. This
140
+ * value is communicated to the callback function via the \p cbid
141
+ * parameter.
142
+ */
143
+ typedef enum {
144
+ /**
145
+ * Invalid resource callback ID.
146
+ */
147
+ CUPTI_CBID_RESOURCE_INVALID = 0,
148
+ /**
149
+ * A new context has been created.
150
+ */
151
+ CUPTI_CBID_RESOURCE_CONTEXT_CREATED = 1,
152
+ /**
153
+ * A context is about to be destroyed.
154
+ */
155
+ CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING = 2,
156
+ /**
157
+ * A new stream has been created.
158
+ */
159
+ CUPTI_CBID_RESOURCE_STREAM_CREATED = 3,
160
+ /**
161
+ * A stream is about to be destroyed.
162
+ */
163
+ CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING = 4,
164
+ /**
165
+ * The driver has finished initializing.
166
+ */
167
+ CUPTI_CBID_RESOURCE_CU_INIT_FINISHED = 5,
168
+ /**
169
+ * A module has been loaded.
170
+ */
171
+ CUPTI_CBID_RESOURCE_MODULE_LOADED = 6,
172
+ /**
173
+ * A module is about to be unloaded.
174
+ */
175
+ CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING = 7,
176
+ /**
177
+ * The current module which is being profiled.
178
+ */
179
+ CUPTI_CBID_RESOURCE_MODULE_PROFILED = 8,
180
+ /**
181
+ * CUDA graph has been created.
182
+ */
183
+ CUPTI_CBID_RESOURCE_GRAPH_CREATED = 9,
184
+ /**
185
+ * CUDA graph is about to be destroyed.
186
+ */
187
+ CUPTI_CBID_RESOURCE_GRAPH_DESTROY_STARTING = 10,
188
+ /**
189
+ * CUDA graph is cloned.
190
+ */
191
+ CUPTI_CBID_RESOURCE_GRAPH_CLONED = 11,
192
+ /**
193
+ * CUDA graph node is about to be created
194
+ */
195
+ CUPTI_CBID_RESOURCE_GRAPHNODE_CREATE_STARTING = 12,
196
+ /**
197
+ * CUDA graph node is created.
198
+ */
199
+ CUPTI_CBID_RESOURCE_GRAPHNODE_CREATED = 13,
200
+ /**
201
+ * CUDA graph node is about to be destroyed.
202
+ */
203
+ CUPTI_CBID_RESOURCE_GRAPHNODE_DESTROY_STARTING = 14,
204
+ /**
205
+ * Dependency on a CUDA graph node is created.
206
+ */
207
+ CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_CREATED = 15,
208
+ /**
209
+ * Dependency on a CUDA graph node is destroyed.
210
+ */
211
+ CUPTI_CBID_RESOURCE_GRAPHNODE_DEPENDENCY_DESTROY_STARTING = 16,
212
+ /**
213
+ * An executable CUDA graph is about to be created.
214
+ */
215
+ CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATE_STARTING = 17,
216
+ /**
217
+ * An executable CUDA graph is created.
218
+ */
219
+ CUPTI_CBID_RESOURCE_GRAPHEXEC_CREATED = 18,
220
+ /**
221
+ * An executable CUDA graph is about to be destroyed.
222
+ */
223
+ CUPTI_CBID_RESOURCE_GRAPHEXEC_DESTROY_STARTING = 19,
224
+ /**
225
+ * CUDA graph node is cloned.
226
+ */
227
+ CUPTI_CBID_RESOURCE_GRAPHNODE_CLONED = 20,
228
+
229
+ CUPTI_CBID_RESOURCE_SIZE,
230
+ CUPTI_CBID_RESOURCE_FORCE_INT = 0x7fffffff
231
+ } CUpti_CallbackIdResource;
232
+
233
+ /**
234
+ * \brief Callback IDs for synchronization domain.
235
+ *
236
+ * Callback IDs for synchronization domain,
237
+ * CUPTI_CB_DOMAIN_SYNCHRONIZE. This value is communicated to the
238
+ * callback function via the \p cbid parameter.
239
+ */
240
+ typedef enum {
241
+ /**
242
+ * Invalid synchronize callback ID.
243
+ */
244
+ CUPTI_CBID_SYNCHRONIZE_INVALID = 0,
245
+ /**
246
+ * Stream synchronization has completed for the stream.
247
+ */
248
+ CUPTI_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED = 1,
249
+ /**
250
+ * Context synchronization has completed for the context.
251
+ */
252
+ CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED = 2,
253
+ CUPTI_CBID_SYNCHRONIZE_SIZE,
254
+ CUPTI_CBID_SYNCHRONIZE_FORCE_INT = 0x7fffffff
255
+ } CUpti_CallbackIdSync;
256
+
257
+ /**
258
+ * \brief Data passed into a runtime or driver API callback function.
259
+ *
260
+ * Data passed into a runtime or driver API callback function as the
261
+ * \p cbdata argument to \ref CUpti_CallbackFunc. The \p cbdata will
262
+ * be this type for \p domain equal to CUPTI_CB_DOMAIN_DRIVER_API or
263
+ * CUPTI_CB_DOMAIN_RUNTIME_API. The callback data is valid only within
264
+ * the invocation of the callback function that is passed the data. If
265
+ * you need to retain some data for use outside of the callback, you
266
+ * must make a copy of that data. For example, if you make a shallow
267
+ * copy of CUpti_CallbackData within a callback, you cannot
268
+ * dereference \p functionParams outside of that callback to access
269
+ * the function parameters. \p functionName is an exception: the
270
+ * string pointed to by \p functionName is a global constant and so
271
+ * may be accessed outside of the callback.
272
+ */
273
+ typedef struct {
274
+ /**
275
+ * Point in the runtime or driver function from where the callback
276
+ * was issued.
277
+ */
278
+ CUpti_ApiCallbackSite callbackSite;
279
+
280
+ /**
281
+ * Name of the runtime or driver API function which issued the
282
+ * callback. This string is a global constant and so may be
283
+ * accessed outside of the callback.
284
+ */
285
+ const char *functionName;
286
+
287
+ /**
288
+ * Pointer to the arguments passed to the runtime or driver API
289
+ * call. See generated_cuda_runtime_api_meta.h and
290
+ * generated_cuda_meta.h for structure definitions for the
291
+ * parameters for each runtime and driver API function.
292
+ */
293
+ const void *functionParams;
294
+
295
+ /**
296
+ * Pointer to the return value of the runtime or driver API
297
+ * call. This field is only valid within the exit::CUPTI_API_EXIT
298
+ * callback. For a runtime API \p functionReturnValue points to a
299
+ * \p cudaError_t. For a driver API \p functionReturnValue points
300
+ * to a \p CUresult.
301
+ */
302
+ void *functionReturnValue;
303
+
304
+ /**
305
+ * Name of the symbol operated on by the runtime or driver API
306
+ * function which issued the callback. This entry is valid only for
307
+ * driver and runtime launch callbacks, where it returns the name of
308
+ * the kernel.
309
+ */
310
+ const char *symbolName;
311
+
312
+ /**
313
+ * Driver context current to the thread, or null if no context is
314
+ * current. This value can change from the entry to exit callback
315
+ * of a runtime API function if the runtime initializes a context.
316
+ */
317
+ CUcontext context;
318
+
319
+ /**
320
+ * Unique ID for the CUDA context associated with the thread. The
321
+ * UIDs are assigned sequentially as contexts are created and are
322
+ * unique within a process.
323
+ */
324
+ uint32_t contextUid;
325
+
326
+ /**
327
+ * Pointer to data shared between the entry and exit callbacks of
328
+ * a given runtime or drive API function invocation. This field
329
+ * can be used to pass 64-bit values from the entry callback to
330
+ * the corresponding exit callback.
331
+ */
332
+ uint64_t *correlationData;
333
+
334
+ /**
335
+ * The activity record correlation ID for this callback. For a
336
+ * driver domain callback (i.e. \p domain
337
+ * CUPTI_CB_DOMAIN_DRIVER_API) this ID will equal the correlation ID
338
+ * in the CUpti_ActivityAPI record corresponding to the CUDA driver
339
+ * function call. For a runtime domain callback (i.e. \p domain
340
+ * CUPTI_CB_DOMAIN_RUNTIME_API) this ID will equal the correlation
341
+ * ID in the CUpti_ActivityAPI record corresponding to the CUDA
342
+ * runtime function call. Within the callback, this ID can be
343
+ * recorded to correlate user data with the activity record. This
344
+ * field is new in 4.1.
345
+ */
346
+ uint32_t correlationId;
347
+
348
+ } CUpti_CallbackData;
349
+
350
+ /**
351
+ * \brief Data passed into a resource callback function.
352
+ *
353
+ * Data passed into a resource callback function as the \p cbdata
354
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
355
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The callback
356
+ * data is valid only within the invocation of the callback function
357
+ * that is passed the data. If you need to retain some data for use
358
+ * outside of the callback, you must make a copy of that data.
359
+ */
360
+ typedef struct {
361
+ /**
362
+ * For CUPTI_CBID_RESOURCE_CONTEXT_CREATED and
363
+ * CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING, the context being
364
+ * created or destroyed. For CUPTI_CBID_RESOURCE_STREAM_CREATED and
365
+ * CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the context
366
+ * containing the stream being created or destroyed.
367
+ */
368
+ CUcontext context;
369
+
370
+ union {
371
+ /**
372
+ * For CUPTI_CBID_RESOURCE_STREAM_CREATED and
373
+ * CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING, the stream being
374
+ * created or destroyed.
375
+ */
376
+ CUstream stream;
377
+ } resourceHandle;
378
+
379
+ /**
380
+ * Reserved for future use.
381
+ */
382
+ void *resourceDescriptor;
383
+ } CUpti_ResourceData;
384
+
385
+
386
+ /**
387
+ * \brief Module data passed into a resource callback function.
388
+ *
389
+ * CUDA module data passed into a resource callback function as the \p cbdata
390
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
391
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The module
392
+ * data is valid only within the invocation of the callback function
393
+ * that is passed the data. If you need to retain some data for use
394
+ * outside of the callback, you must make a copy of that data.
395
+ */
396
+
397
+ typedef struct {
398
+ /**
399
+ * Identifier to associate with the CUDA module.
400
+ */
401
+ uint32_t moduleId;
402
+
403
+ /**
404
+ * The size of the cubin.
405
+ */
406
+ size_t cubinSize;
407
+
408
+ /**
409
+ * Pointer to the associated cubin.
410
+ */
411
+ const char *pCubin;
412
+ } CUpti_ModuleResourceData;
413
+
414
+ /**
415
+ * \brief CUDA graphs data passed into a resource callback function.
416
+ *
417
+ * CUDA graphs data passed into a resource callback function as the \p cbdata
418
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
419
+ * type for \p domain equal to CUPTI_CB_DOMAIN_RESOURCE. The graph
420
+ * data is valid only within the invocation of the callback function
421
+ * that is passed the data. If you need to retain some data for use
422
+ * outside of the callback, you must make a copy of that data.
423
+ */
424
+
425
+ typedef struct {
426
+ /**
427
+ * CUDA graph
428
+ */
429
+ CUgraph graph;
430
+ /**
431
+ * The original CUDA graph from which \param graph is cloned
432
+ */
433
+ CUgraph originalGraph;
434
+ /**
435
+ * CUDA graph node
436
+ */
437
+ CUgraphNode node;
438
+ /**
439
+ * The original CUDA graph node from which \param node is cloned
440
+ */
441
+ CUgraphNode originalNode;
442
+ /**
443
+ * Type of the \param node
444
+ */
445
+ CUgraphNodeType nodeType;
446
+ /**
447
+ * The dependent graph node
448
+ * The size of the array is \param numDependencies.
449
+ */
450
+ CUgraphNode dependency;
451
+ /**
452
+ * CUDA executable graph
453
+ */
454
+ CUgraphExec graphExec;
455
+ } CUpti_GraphData;
456
+
457
+ /**
458
+ * \brief Data passed into a synchronize callback function.
459
+ *
460
+ * Data passed into a synchronize callback function as the \p cbdata
461
+ * argument to \ref CUpti_CallbackFunc. The \p cbdata will be this
462
+ * type for \p domain equal to CUPTI_CB_DOMAIN_SYNCHRONIZE. The
463
+ * callback data is valid only within the invocation of the callback
464
+ * function that is passed the data. If you need to retain some data
465
+ * for use outside of the callback, you must make a copy of that data.
466
+ */
467
+ typedef struct {
468
+ /**
469
+ * The context of the stream being synchronized.
470
+ */
471
+ CUcontext context;
472
+ /**
473
+ * The stream being synchronized.
474
+ */
475
+ CUstream stream;
476
+ } CUpti_SynchronizeData;
477
+
478
+ /**
479
+ * \brief Data passed into a NVTX callback function.
480
+ *
481
+ * Data passed into a NVTX callback function as the \p cbdata argument
482
+ * to \ref CUpti_CallbackFunc. The \p cbdata will be this type for \p
483
+ * domain equal to CUPTI_CB_DOMAIN_NVTX. Unless otherwise notes, the
484
+ * callback data is valid only within the invocation of the callback
485
+ * function that is passed the data. If you need to retain some data
486
+ * for use outside of the callback, you must make a copy of that data.
487
+ */
488
+ typedef struct {
489
+ /**
490
+ * Name of the NVTX API function which issued the callback. This
491
+ * string is a global constant and so may be accessed outside of the
492
+ * callback.
493
+ */
494
+ const char *functionName;
495
+
496
+ /**
497
+ * Pointer to the arguments passed to the NVTX API call. See
498
+ * generated_nvtx_meta.h for structure definitions for the
499
+ * parameters for each NVTX API function.
500
+ */
501
+ const void *functionParams;
502
+
503
+ /**
504
+ * Pointer to the return value of the NVTX API call. See
505
+ * nvToolsExt.h for each NVTX API function's return value.
506
+ */
507
+ const void *functionReturnValue;
508
+ } CUpti_NvtxData;
509
+
510
+ /**
511
+ * \brief An ID for a driver API, runtime API, resource or
512
+ * synchronization callback.
513
+ *
514
+ * An ID for a driver API, runtime API, resource or synchronization
515
+ * callback. Within a driver API callback this should be interpreted
516
+ * as a CUpti_driver_api_trace_cbid value (these values are defined in
517
+ * cupti_driver_cbid.h). Within a runtime API callback this should be
518
+ * interpreted as a CUpti_runtime_api_trace_cbid value (these values
519
+ * are defined in cupti_runtime_cbid.h). Within a resource API
520
+ * callback this should be interpreted as a \ref
521
+ * CUpti_CallbackIdResource value. Within a synchronize API callback
522
+ * this should be interpreted as a \ref CUpti_CallbackIdSync value.
523
+ */
524
+ typedef uint32_t CUpti_CallbackId;
525
+
526
+ /**
527
+ * \brief Function type for a callback.
528
+ *
529
+ * Function type for a callback. The type of the data passed to the
530
+ * callback in \p cbdata depends on the \p domain. If \p domain is
531
+ * CUPTI_CB_DOMAIN_DRIVER_API or CUPTI_CB_DOMAIN_RUNTIME_API the type
532
+ * of \p cbdata will be CUpti_CallbackData. If \p domain is
533
+ * CUPTI_CB_DOMAIN_RESOURCE the type of \p cbdata will be
534
+ * CUpti_ResourceData. If \p domain is CUPTI_CB_DOMAIN_SYNCHRONIZE the
535
+ * type of \p cbdata will be CUpti_SynchronizeData. If \p domain is
536
+ * CUPTI_CB_DOMAIN_NVTX the type of \p cbdata will be CUpti_NvtxData.
537
+ *
538
+ * \param userdata User data supplied at subscription of the callback
539
+ * \param domain The domain of the callback
540
+ * \param cbid The ID of the callback
541
+ * \param cbdata Data passed to the callback.
542
+ */
543
+ typedef void (CUPTIAPI *CUpti_CallbackFunc)(
544
+ void *userdata,
545
+ CUpti_CallbackDomain domain,
546
+ CUpti_CallbackId cbid,
547
+ const void *cbdata);
548
+
549
+ /**
550
+ * \brief A callback subscriber.
551
+ */
552
+ typedef struct CUpti_Subscriber_st *CUpti_SubscriberHandle;
553
+
554
+ /**
555
+ * \brief Pointer to an array of callback domains.
556
+ */
557
+ typedef CUpti_CallbackDomain *CUpti_DomainTable;
558
+
559
+ /**
560
+ * \brief Get the available callback domains.
561
+ *
562
+ * Returns in \p *domainTable an array of size \p *domainCount of all
563
+ * the available callback domains.
564
+ * \note \b Thread-safety: this function is thread safe.
565
+ *
566
+ * \param domainCount Returns number of callback domains
567
+ * \param domainTable Returns pointer to array of available callback domains
568
+ *
569
+ * \retval CUPTI_SUCCESS on success
570
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
571
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p domainCount or \p domainTable are NULL
572
+ */
573
+ CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount,
574
+ CUpti_DomainTable *domainTable);
575
+
576
+ /**
577
+ * \brief Initialize a callback subscriber with a callback function
578
+ * and user data.
579
+ *
580
+ * Initializes a callback subscriber with a callback function and
581
+ * (optionally) a pointer to user data. The returned subscriber handle
582
+ * can be used to enable and disable the callback for specific domains
583
+ * and callback IDs.
584
+ * \note Only a single subscriber can be registered at a time. To ensure
585
+ * that no other CUPTI client interrupts the profiling session, it's the
586
+ * responsibility of all the CUPTI clients to call this function before
587
+ * starting the profling session. In case profiling session is already
588
+ * started by another CUPTI client, this function returns the error code
589
+ * CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED.
590
+ * Note that this function returns the same error when application is
591
+ * launched using NVIDIA tools like nvprof, Visual Profiler, Nsight Systems,
592
+ * Nsight Compute, cuda-gdb and cuda-memcheck.
593
+ * \note This function does not enable any callbacks.
594
+ * \note \b Thread-safety: this function is thread safe.
595
+ *
596
+ * \param subscriber Returns handle to initialize subscriber
597
+ * \param callback The callback function
598
+ * \param userdata A pointer to user data. This data will be passed to
599
+ * the callback function via the \p userdata paramater.
600
+ *
601
+ * \retval CUPTI_SUCCESS on success
602
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialize CUPTI
603
+ * \retval CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED if there is already a CUPTI subscriber
604
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is NULL
605
+ */
606
+ CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber,
607
+ CUpti_CallbackFunc callback,
608
+ void *userdata);
609
+
610
+ /**
611
+ * \brief Unregister a callback subscriber.
612
+ *
613
+ * Removes a callback subscriber so that no future callbacks will be
614
+ * issued to that subscriber.
615
+ * \note \b Thread-safety: this function is thread safe.
616
+ *
617
+ * \param subscriber Handle to the initialize subscriber
618
+ *
619
+ * \retval CUPTI_SUCCESS on success
620
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
621
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is NULL or not initialized
622
+ */
623
+ CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber);
624
+
625
+ /**
626
+ * \brief Get the current enabled/disabled state of a callback for a specific
627
+ * domain and function ID.
628
+ *
629
+ * Returns non-zero in \p *enable if the callback for a domain and
630
+ * callback ID is enabled, and zero if not enabled.
631
+ *
632
+ * \note \b Thread-safety: a subscriber must serialize access to
633
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
634
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
635
+ * d, c) and cuptiEnableCallback(sub, d, c) are called concurrently,
636
+ * the results are undefined.
637
+ *
638
+ * \param enable Returns non-zero if callback enabled, zero if not enabled
639
+ * \param subscriber Handle to the initialize subscriber
640
+ * \param domain The domain of the callback
641
+ * \param cbid The ID of the callback
642
+ *
643
+ * \retval CUPTI_SUCCESS on success
644
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
645
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p enabled is NULL, or if \p
646
+ * subscriber, \p domain or \p cbid is invalid.
647
+ */
648
+ CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable,
649
+ CUpti_SubscriberHandle subscriber,
650
+ CUpti_CallbackDomain domain,
651
+ CUpti_CallbackId cbid);
652
+
653
+ /**
654
+ * \brief Enable or disabled callbacks for a specific domain and
655
+ * callback ID.
656
+ *
657
+ * Enable or disabled callbacks for a subscriber for a specific domain
658
+ * and callback ID.
659
+ *
660
+ * \note \b Thread-safety: a subscriber must serialize access to
661
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
662
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
663
+ * d, c) and cuptiEnableCallback(sub, d, c) are called concurrently,
664
+ * the results are undefined.
665
+ *
666
+ * \param enable New enable state for the callback. Zero disables the
667
+ * callback, non-zero enables the callback.
668
+ * \param subscriber - Handle to callback subscription
669
+ * \param domain The domain of the callback
670
+ * \param cbid The ID of the callback
671
+ *
672
+ * \retval CUPTI_SUCCESS on success
673
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
674
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber, \p domain or \p
675
+ * cbid is invalid.
676
+ */
677
+ CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable,
678
+ CUpti_SubscriberHandle subscriber,
679
+ CUpti_CallbackDomain domain,
680
+ CUpti_CallbackId cbid);
681
+
682
+ /**
683
+ * \brief Enable or disabled all callbacks for a specific domain.
684
+ *
685
+ * Enable or disabled all callbacks for a specific domain.
686
+ *
687
+ * \note \b Thread-safety: a subscriber must serialize access to
688
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
689
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackEnabled(sub,
690
+ * d, *) and cuptiEnableDomain(sub, d) are called concurrently, the
691
+ * results are undefined.
692
+ *
693
+ * \param enable New enable state for all callbacks in the
694
+ * domain. Zero disables all callbacks, non-zero enables all
695
+ * callbacks.
696
+ * \param subscriber - Handle to callback subscription
697
+ * \param domain The domain of the callback
698
+ *
699
+ * \retval CUPTI_SUCCESS on success
700
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
701
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber or \p domain is invalid
702
+ */
703
+ CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable,
704
+ CUpti_SubscriberHandle subscriber,
705
+ CUpti_CallbackDomain domain);
706
+
707
+ /**
708
+ * \brief Enable or disable all callbacks in all domains.
709
+ *
710
+ * Enable or disable all callbacks in all domains.
711
+ *
712
+ * \note \b Thread-safety: a subscriber must serialize access to
713
+ * cuptiGetCallbackState, cuptiEnableCallback, cuptiEnableDomain, and
714
+ * cuptiEnableAllDomains. For example, if cuptiGetCallbackState(sub,
715
+ * d, *) and cuptiEnableAllDomains(sub) are called concurrently, the
716
+ * results are undefined.
717
+ *
718
+ * \param enable New enable state for all callbacks in all
719
+ * domain. Zero disables all callbacks, non-zero enables all
720
+ * callbacks.
721
+ * \param subscriber - Handle to callback subscription
722
+ *
723
+ * \retval CUPTI_SUCCESS on success
724
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if unable to initialized CUPTI
725
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p subscriber is invalid
726
+ */
727
+ CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable,
728
+ CUpti_SubscriberHandle subscriber);
729
+
730
+ /**
731
+ * \brief Get the name of a callback for a specific domain and callback ID.
732
+ *
733
+ * Returns a pointer to the name c_string in \p **name.
734
+ *
735
+ * \note \b Names are available only for the DRIVER and RUNTIME domains.
736
+ *
737
+ * \param domain The domain of the callback
738
+ * \param cbid The ID of the callback
739
+ * \param name Returns pointer to the name string on success, NULL otherwise
740
+ *
741
+ * \retval CUPTI_SUCCESS on success
742
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p name is NULL, or if
743
+ * \p domain or \p cbid is invalid.
744
+ */
745
+ CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain,
746
+ uint32_t cbid,
747
+ const char **name);
748
+
749
+ /** @} */ /* END CUPTI_CALLBACK_API */
750
+
751
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
752
+ #pragma GCC visibility pop
753
+ #endif
754
+
755
+ #if defined(__cplusplus)
756
+ }
757
+ #endif
758
+
759
+ #endif // file guard
760
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_checkpoint.h ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <cuda.h>
4
+ #include <cupti_result.h>
5
+
6
+ #include <stddef.h>
7
+ #include <stdint.h>
8
+
9
+ namespace NV { namespace Cupti { namespace Checkpoint {
10
+
11
+ #ifdef __cplusplus
12
+ extern "C"
13
+ {
14
+ #endif
15
+
16
+ /**
17
+ * \defgroup CUPTI_CHECKPOINT_API CUPTI Checkpoint API
18
+ * Functions, types, and enums that implement the CUPTI Checkpoint API.
19
+ * @{
20
+ */
21
+
22
+ /**
23
+ * \brief Specifies optimization options for a checkpoint, may be OR'd together to specify multiple options.
24
+ */
25
+ typedef enum
26
+ {
27
+ CUPTI_CHECKPOINT_OPT_NONE = 0, //!< Default behavior
28
+ CUPTI_CHECKPOINT_OPT_TRANSFER = 1, //!< Determine which mem blocks have changed, and only restore those. This optimization is cached, which means cuptiCheckpointRestore must always be called at the same point in the application when this option is enabled, or the result may be incorrect.
29
+ } CUpti_CheckpointOptimizations;
30
+
31
+ /**
32
+ * \brief Configuration and handle for a CUPTI Checkpoint
33
+ *
34
+ * A CUptiCheckpoint object should be initialized with desired options prior to passing into any
35
+ * CUPTI Checkpoint API function. The first call into a Checkpoint API function will initialize internal
36
+ * state based on these options. Subsequent changes to these options will not have any effect.
37
+ *
38
+ * Checkpoint data is saved in device, host, and filesystem space. There are options to reserve memory
39
+ * at each level (device, host, filesystem) which are intended to allow a guarantee that a certain amount
40
+ * of memory will remain free for use after the checkpoint is saved.
41
+ * Note, however, that falling back to slower levels of memory (host, and then filesystem) to save the checkpoint
42
+ * will result in performance degradation.
43
+ * Currently, the filesystem limitation is not implemented. Note that falling back to filesystem storage may
44
+ * significantly impact the performance for saving and restoring a checkpoint.
45
+ */
46
+ typedef struct
47
+ {
48
+ size_t structSize; //!< [in] Must be set to CUpti_Checkpoint_STRUCT_SIZE
49
+
50
+ CUcontext ctx; //!< [in] Set to context to save from, or will use current context if NULL
51
+
52
+ size_t reserveDeviceMB; //!< [in] Restrict checkpoint from using last N MB of device memory (-1 = use no device memory)
53
+ size_t reserveHostMB; //!< [in] Restrict checkpoint from using last N MB of host memory (-1 = use no host memory)
54
+ uint8_t allowOverwrite; //!< [in] Boolean, Allow checkpoint to save over existing checkpoint
55
+ uint8_t optimizations; //!< [in] Mask of CUpti_CheckpointOptimizations flags for this checkpoint
56
+
57
+ void * pPriv; //!< [in] Assign to NULL
58
+ } CUpti_Checkpoint;
59
+
60
+ #define CUpti_Checkpoint_STRUCT_SIZE \
61
+ (offsetof(CUpti_Checkpoint, pPriv) + \
62
+ sizeof(((CUpti_Checkpoint*)(nullptr))->pPriv))
63
+
64
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
65
+ #pragma GCC visibility push(default)
66
+ #endif
67
+
68
+ /**
69
+ * \brief Initialize and save a checkpoint of the device state associated with the handle context
70
+ *
71
+ * Uses the handle options to configure and save a checkpoint of the device state associated with the specified context.
72
+ *
73
+ * \param handle A pointer to a CUpti_Checkpoint object
74
+ *
75
+ * \retval CUPTI_SUCCESS if a checkpoint was successfully initialized and saved
76
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p handle does not appear to refer to a valid CUpti_Checkpoint
77
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
78
+ * \retval CUPTI_ERROR_INVALID_DEVICE if device associated with context is not compatible with checkpoint API
79
+ * \retval CUPTI_ERROR_INVALID_OPERATION if Save is requested over an existing checkpoint, but \p allowOverwrite was not originally specified
80
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY if as configured, not enough backing storage space to save the checkpoint
81
+ */
82
+ CUptiResult cuptiCheckpointSave(CUpti_Checkpoint * const handle);
83
+
84
+ /**
85
+ * \brief Restore a checkpoint to the device associated with its context
86
+ *
87
+ * Restores device, pinned, and allocated memory to the state when the checkpoint was saved
88
+ *
89
+ * \param handle A pointer to a previously saved CUpti_Checkpoint object
90
+ *
91
+ * \retval CUTPI_SUCCESS if the checkpoint was successfully restored
92
+ * \retval CUPTI_ERROR_NOT_INITIALIZED if the checkpoint was not previously initialized
93
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
94
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if the handle appears invalid
95
+ * \retval CUPTI_ERROR_UNKNOWN if the restore or optimization operation fails
96
+ */
97
+ CUptiResult cuptiCheckpointRestore(CUpti_Checkpoint * const handle);
98
+
99
+ /**
100
+ * \brief Free the backing data for a checkpoint
101
+ *
102
+ * Frees all associated device, host memory and filesystem storage used for this context.
103
+ * After freeing a handle, it may be re-used as if it was new - options may be re-configured and will
104
+ * take effect on the next call to \p cuptiCheckpointSave.
105
+ *
106
+ * \param handle A pointer to a previously saved CUpti_Checkpoint object
107
+ *
108
+ * \retval CUPTI_SUCCESS if the handle was successfully freed
109
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if the handle was already freed or appears invalid
110
+ * \retval CUPTI_ERROR_INVALID_CONTEXT if the context is no longer valid
111
+ */
112
+ CUptiResult cuptiCheckpointFree(CUpti_Checkpoint * const handle);
113
+
114
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
115
+ #pragma GCC visibility pop
116
+ #endif
117
+
118
+ /**
119
+ * @}
120
+ */
121
+
122
+ #ifdef __cplusplus
123
+ }
124
+ #endif
125
+
126
+ // Exit namespace NV::Cupti::Checkpoint
127
+ }}}
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_nvtx_cbid.h ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2013-2017 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
51
+ #pragma GCC visibility push(default)
52
+ #endif
53
+
54
+ typedef enum {
55
+ CUPTI_CBID_NVTX_INVALID = 0,
56
+ CUPTI_CBID_NVTX_nvtxMarkA = 1,
57
+ CUPTI_CBID_NVTX_nvtxMarkW = 2,
58
+ CUPTI_CBID_NVTX_nvtxMarkEx = 3,
59
+ CUPTI_CBID_NVTX_nvtxRangeStartA = 4,
60
+ CUPTI_CBID_NVTX_nvtxRangeStartW = 5,
61
+ CUPTI_CBID_NVTX_nvtxRangeStartEx = 6,
62
+ CUPTI_CBID_NVTX_nvtxRangeEnd = 7,
63
+ CUPTI_CBID_NVTX_nvtxRangePushA = 8,
64
+ CUPTI_CBID_NVTX_nvtxRangePushW = 9,
65
+ CUPTI_CBID_NVTX_nvtxRangePushEx = 10,
66
+ CUPTI_CBID_NVTX_nvtxRangePop = 11,
67
+ CUPTI_CBID_NVTX_nvtxNameCategoryA = 12,
68
+ CUPTI_CBID_NVTX_nvtxNameCategoryW = 13,
69
+ CUPTI_CBID_NVTX_nvtxNameOsThreadA = 14,
70
+ CUPTI_CBID_NVTX_nvtxNameOsThreadW = 15,
71
+ CUPTI_CBID_NVTX_nvtxNameCuDeviceA = 16,
72
+ CUPTI_CBID_NVTX_nvtxNameCuDeviceW = 17,
73
+ CUPTI_CBID_NVTX_nvtxNameCuContextA = 18,
74
+ CUPTI_CBID_NVTX_nvtxNameCuContextW = 19,
75
+ CUPTI_CBID_NVTX_nvtxNameCuStreamA = 20,
76
+ CUPTI_CBID_NVTX_nvtxNameCuStreamW = 21,
77
+ CUPTI_CBID_NVTX_nvtxNameCuEventA = 22,
78
+ CUPTI_CBID_NVTX_nvtxNameCuEventW = 23,
79
+ CUPTI_CBID_NVTX_nvtxNameCudaDeviceA = 24,
80
+ CUPTI_CBID_NVTX_nvtxNameCudaDeviceW = 25,
81
+ CUPTI_CBID_NVTX_nvtxNameCudaStreamA = 26,
82
+ CUPTI_CBID_NVTX_nvtxNameCudaStreamW = 27,
83
+ CUPTI_CBID_NVTX_nvtxNameCudaEventA = 28,
84
+ CUPTI_CBID_NVTX_nvtxNameCudaEventW = 29,
85
+ CUPTI_CBID_NVTX_nvtxDomainMarkEx = 30,
86
+ CUPTI_CBID_NVTX_nvtxDomainRangeStartEx = 31,
87
+ CUPTI_CBID_NVTX_nvtxDomainRangeEnd = 32,
88
+ CUPTI_CBID_NVTX_nvtxDomainRangePushEx = 33,
89
+ CUPTI_CBID_NVTX_nvtxDomainRangePop = 34,
90
+ CUPTI_CBID_NVTX_nvtxDomainResourceCreate = 35,
91
+ CUPTI_CBID_NVTX_nvtxDomainResourceDestroy = 36,
92
+ CUPTI_CBID_NVTX_nvtxDomainNameCategoryA = 37,
93
+ CUPTI_CBID_NVTX_nvtxDomainNameCategoryW = 38,
94
+ CUPTI_CBID_NVTX_nvtxDomainRegisterStringA = 39,
95
+ CUPTI_CBID_NVTX_nvtxDomainRegisterStringW = 40,
96
+ CUPTI_CBID_NVTX_nvtxDomainCreateA = 41,
97
+ CUPTI_CBID_NVTX_nvtxDomainCreateW = 42,
98
+ CUPTI_CBID_NVTX_nvtxDomainDestroy = 43,
99
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserCreate = 44,
100
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserDestroy = 45,
101
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireStart = 46,
102
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireFailed = 47,
103
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserAcquireSuccess = 48,
104
+ CUPTI_CBID_NVTX_nvtxDomainSyncUserReleasing = 49,
105
+ CUPTI_CBID_NVTX_SIZE,
106
+ CUPTI_CBID_NVTX_FORCE_INT = 0x7fffffff
107
+ } CUpti_nvtx_api_trace_cbid;
108
+
109
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
110
+ #pragma GCC visibility pop
111
+ #endif
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_profiler_target.h ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2011-2020 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_PROFILER_TARGET_H_)
51
+ #define _CUPTI_PROFILER_TARGET_H_
52
+
53
+ #include <cuda.h>
54
+ #include <cupti_result.h>
55
+ #include <stddef.h>
56
+ #include <stdint.h>
57
+
58
+ #ifdef __cplusplus
59
+ extern "C" {
60
+ #endif
61
+
62
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
63
+ #pragma GCC visibility push(default)
64
+ #endif
65
+
66
+ /**
67
+ * \defgroup CUPTI_PROFILER_API CUPTI Profiling API
68
+ * Functions, types, and enums that implement the CUPTI Profiling API.
69
+ * @{
70
+ */
71
+ #ifndef CUPTI_PROFILER_STRUCT_SIZE
72
+ #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
73
+ #endif
74
+
75
+ /**
76
+ * \brief Profiler range attribute
77
+ *
78
+ * A metric enabled in the session's configuration is collected separately per unique range-stack in the pass.
79
+ * This is an attribute to collect metrics around each kernel in a profiling session or in an user defined range.
80
+ */
81
+ typedef enum
82
+ {
83
+ /**
84
+ * Invalid value
85
+ */
86
+ CUPTI_Range_INVALID,
87
+ /**
88
+ * Ranges are auto defined around each kernel in a profiling session
89
+ */
90
+ CUPTI_AutoRange,
91
+ /**
92
+ * A range in which metric data to be collected is defined by the user
93
+ */
94
+ CUPTI_UserRange,
95
+ /**
96
+ * Range count
97
+ */
98
+ CUPTI_Range_COUNT,
99
+ } CUpti_ProfilerRange;
100
+
101
+ /**
102
+ * \brief Profiler replay attribute
103
+ *
104
+ * For metrics which require multipass collection, a replay of the GPU kernel(s) is required.
105
+ * This is an attribute which specify how the replay of the kernel(s) to be measured is done.
106
+ */
107
+ typedef enum
108
+ {
109
+ /**
110
+ * Invalid Value
111
+ */
112
+ CUPTI_Replay_INVALID,
113
+ /**
114
+ * Replay is done by CUPTI user around the process
115
+ */
116
+ CUPTI_ApplicationReplay,
117
+ /**
118
+ * Replay is done around kernel implicitly by CUPTI
119
+ */
120
+ CUPTI_KernelReplay,
121
+ /**
122
+ * Replay is done by CUPTI user within a process
123
+ */
124
+ CUPTI_UserReplay,
125
+ /**
126
+ * Replay count
127
+ */
128
+ CUPTI_Replay_COUNT,
129
+ } CUpti_ProfilerReplayMode;
130
+
131
+ /**
132
+ * \brief Default parameter for cuptiProfilerInitialize
133
+ */
134
+ typedef struct CUpti_Profiler_Initialize_Params
135
+ {
136
+ size_t structSize; //!< [in] CUpti_Profiler_Initialize_Params_STRUCT_SIZE
137
+ void* pPriv; //!< [in] assign to NULL
138
+
139
+ } CUpti_Profiler_Initialize_Params;
140
+ #define CUpti_Profiler_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Initialize_Params, pPriv)
141
+
142
+ /**
143
+ * \brief Default parameter for cuptiProfilerDeInitialize
144
+ */
145
+ typedef struct CUpti_Profiler_DeInitialize_Params
146
+ {
147
+ size_t structSize; //!< [in] CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE
148
+ void* pPriv; //!< [in] assign to NULL
149
+
150
+ } CUpti_Profiler_DeInitialize_Params;
151
+ #define CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeInitialize_Params, pPriv)
152
+
153
+ /**
154
+ * \brief Initializes the profiler interface
155
+ *
156
+ * Loads the required libraries in the process address space.
157
+ * Sets up the hooks with the CUDA driver.
158
+ */
159
+ CUptiResult CUPTIAPI cuptiProfilerInitialize(CUpti_Profiler_Initialize_Params *pParams);
160
+
161
+ /**
162
+ * \brief DeInitializes the profiler interface
163
+ */
164
+ CUptiResult CUPTIAPI cuptiProfilerDeInitialize(CUpti_Profiler_DeInitialize_Params *pParams);
165
+
166
+ /**
167
+ * \brief Input parameter to define the counterDataImage
168
+ */
169
+ typedef struct CUpti_Profiler_CounterDataImageOptions
170
+ {
171
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImageOptions_Params_STRUCT_SIZE
172
+ void* pPriv; //!< [in] assign to NULL
173
+
174
+ const uint8_t* pCounterDataPrefix; /**< [in] Address of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
175
+ Must be align(8).*/
176
+ size_t counterDataPrefixSize; //!< [in] Size of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix().
177
+ uint32_t maxNumRanges; //!< [in] Maximum number of ranges that can be profiled
178
+ uint32_t maxNumRangeTreeNodes; //!< [in] Maximum number of RangeTree nodes; must be >= maxNumRanges
179
+ uint32_t maxRangeNameLength; //!< [in] Maximum string length of each RangeName, including the trailing NULL character
180
+ } CUpti_Profiler_CounterDataImageOptions;
181
+ #define CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImageOptions, maxRangeNameLength)
182
+
183
+ /**
184
+ * \brief Params for cuptiProfilerCounterDataImageCalculateSize
185
+ */
186
+ typedef struct CUpti_Profiler_CounterDataImage_CalculateSize_Params
187
+ {
188
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE
189
+ void* pPriv; //!< [in] assign to NULL
190
+
191
+ size_t sizeofCounterDataImageOptions; //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
192
+ const CUpti_Profiler_CounterDataImageOptions* pOptions; //!< [in] Pointer to Counter Data Image Options
193
+ size_t counterDataImageSize; //!< [out]
194
+ } CUpti_Profiler_CounterDataImage_CalculateSize_Params;
195
+ #define CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateSize_Params, counterDataImageSize)
196
+
197
+ /**
198
+ * \brief Params for cuptiProfilerCounterDataImageInitialize
199
+ */
200
+ typedef struct CUpti_Profiler_CounterDataImage_Initialize_Params
201
+ {
202
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE
203
+ void* pPriv; //!< [in] assign to NULL
204
+
205
+ size_t sizeofCounterDataImageOptions; //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE
206
+ const CUpti_Profiler_CounterDataImageOptions* pOptions; //!< [in] Pointer to Counter Data Image Options
207
+ size_t counterDataImageSize; //!< [in] Size calculated from cuptiProfilerCounterDataImageCalculateSize
208
+ uint8_t* pCounterDataImage; //!< [in] The buffer to be initialized.
209
+ } CUpti_Profiler_CounterDataImage_Initialize_Params;
210
+ #define CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_Initialize_Params, pCounterDataImage)
211
+
212
+ /**
213
+ * \brief A CounterData image allocates space for values for each counter for each range.
214
+ *
215
+ * User borne the resposibility of managing the counterDataImage allocations.
216
+ * CounterDataPrefix contains meta data about the metrics that will be stored in counterDataImage.
217
+ * Use these APIs to calculate the allocation size and initialize counterData image.
218
+ */
219
+ CUptiResult cuptiProfilerCounterDataImageCalculateSize(CUpti_Profiler_CounterDataImage_CalculateSize_Params* pParams);
220
+ CUptiResult cuptiProfilerCounterDataImageInitialize(CUpti_Profiler_CounterDataImage_Initialize_Params* pParams);
221
+
222
+ /**
223
+ * \brief Params for cuptiProfilerCounterDataImageCalculateScratchBufferSize
224
+ */
225
+ typedef struct CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
226
+ {
227
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE
228
+ void* pPriv; //!< [in] assign to NULL
229
+
230
+ size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
231
+ uint8_t* pCounterDataImage; //!< [in]
232
+ size_t counterDataScratchBufferSize; //!< [out]
233
+ } CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params;
234
+ #define CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params, counterDataScratchBufferSize)
235
+
236
+ /**
237
+ * \brief Params for cuptiProfilerCounterDataImageInitializeScratchBuffer
238
+ */
239
+ typedef struct CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
240
+ {
241
+ size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE
242
+ void* pPriv; //!< [in] assign to NULL
243
+
244
+ size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
245
+ uint8_t* pCounterDataImage; //!< [in]
246
+ size_t counterDataScratchBufferSize; //!< [in] size calculated using cuptiProfilerCounterDataImageCalculateScratchBufferSize
247
+ uint8_t* pCounterDataScratchBuffer; //!< [in] the scratch buffer to be initialized.
248
+ } CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params;
249
+ #define CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params, pCounterDataScratchBuffer)
250
+
251
+ /**
252
+ * \brief A temporary storage for CounterData image needed for internal operations
253
+ *
254
+ * Use these APIs to calculate the allocation size and initialize counterData image scratch buffer.
255
+ */
256
+ CUptiResult cuptiProfilerCounterDataImageCalculateScratchBufferSize(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params* pParams);
257
+ CUptiResult cuptiProfilerCounterDataImageInitializeScratchBuffer(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params* pParams);
258
+
259
+ /**
260
+ * \brief Params for cuptiProfilerBeginSession
261
+ */
262
+ typedef struct CUpti_Profiler_BeginSession_Params
263
+ {
264
+ size_t structSize; //!< [in] CUpti_Profiler_BeginSession_Params_STRUCT_SIZE
265
+ void* pPriv; //!< [in] assign to NULL
266
+
267
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
268
+ size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize
269
+ uint8_t* pCounterDataImage; //!< [in] address of CounterDataImage
270
+ size_t counterDataScratchBufferSize; //!< [in] size calculated from cuptiProfilerCounterDataImageInitializeScratchBuffer
271
+ uint8_t* pCounterDataScratchBuffer; //!< [in] address of CounterDataImage scratch buffer
272
+ uint8_t bDumpCounterDataInFile; //!< [in] [optional]
273
+ const char* pCounterDataFilePath; //!< [in] [optional]
274
+ CUpti_ProfilerRange range; //!< [in] CUpti_ProfilerRange
275
+ CUpti_ProfilerReplayMode replayMode; //!< [in] CUpti_ProfilerReplayMode
276
+ /* Replay options, required when replay is done by cupti user */
277
+ size_t maxRangesPerPass; //!< [in] Maximum number of ranges that can be recorded in a single pass.
278
+ size_t maxLaunchesPerPass; //!< [in] Maximum number of kernel launches that can be recorded in a single pass; must be >= maxRangesPerPass.
279
+
280
+ } CUpti_Profiler_BeginSession_Params;
281
+ #define CUpti_Profiler_BeginSession_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginSession_Params, maxLaunchesPerPass)
282
+ /**
283
+ * \brief Params for cuptiProfilerEndSession
284
+ */
285
+ typedef struct CUpti_Profiler_EndSession_Params
286
+ {
287
+ size_t structSize; //!< [in] CUpti_Profiler_EndSession_Params_STRUCT_SIZE
288
+ void* pPriv; //!< [in] assign to NULL
289
+
290
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
291
+ } CUpti_Profiler_EndSession_Params;
292
+ #define CUpti_Profiler_EndSession_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndSession_Params, ctx)
293
+
294
+ /**
295
+ * \brief Begin profiling session sets up the profiling on the device
296
+ *
297
+ * Although, it doesn't start the profiling but GPU resources needed for profiling are allocated.
298
+ * Outside of a session, the GPU will return to its normal operating state.
299
+ */
300
+ CUptiResult CUPTIAPI cuptiProfilerBeginSession(CUpti_Profiler_BeginSession_Params* pParams);
301
+ /**
302
+ * \brief Ends profiling session
303
+ *
304
+ * Frees up the GPU resources acquired for profiling.
305
+ * Outside of a session, the GPU will return to it's normal operating state.
306
+ */
307
+ CUptiResult CUPTIAPI cuptiProfilerEndSession(CUpti_Profiler_EndSession_Params* pParams);
308
+
309
+ /**
310
+ * \brief Params for cuptiProfilerSetConfig
311
+ */
312
+ typedef struct CUpti_Profiler_SetConfig_Params
313
+ {
314
+ size_t structSize; //!< [in] CUpti_Profiler_SetConfig_Params_STRUCT_SIZE
315
+ void* pPriv; //!< [in] assign to NULL
316
+
317
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
318
+ const uint8_t* pConfig; //!< [in] Config created by NVPW_RawMetricsConfig_GetConfigImage(). Must be align(8).
319
+ size_t configSize; //!< [in] size of config
320
+ uint16_t minNestingLevel; //!< [in] the lowest nesting level to be profiled; must be >= 1
321
+ uint16_t numNestingLevels; //!< [in] the number of nesting levels to profile; must be >= 1
322
+ size_t passIndex; //!< [in] Set this to zero for in-app replay; set this to the output of EndPass() for application replay
323
+ uint16_t targetNestingLevel; //!< [in] Set this to minNestingLevel for in-app replay; set this to the output of EndPass() for application
324
+ } CUpti_Profiler_SetConfig_Params;
325
+
326
+ #define CUpti_Profiler_SetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_SetConfig_Params, targetNestingLevel)
327
+
328
+ /**
329
+ * \brief Params for cuptiProfilerUnsetConfig
330
+ */
331
+ typedef struct CUpti_Profiler_UnsetConfig_Params
332
+ {
333
+ size_t structSize; //!< [in] CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE
334
+ void* pPriv; //!< [in] assign to NULL
335
+
336
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
337
+ } CUpti_Profiler_UnsetConfig_Params;
338
+ #define CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_UnsetConfig_Params, ctx)
339
+
340
+ /**
341
+ * \brief Set metrics configuration to be profiled
342
+ *
343
+ * Use these APIs to set the config to profile in a session. It can be used for advanced cases such as where multiple
344
+ * configurations are collected into a single CounterData Image on the need basis, without restarting the session.
345
+ */
346
+ CUptiResult CUPTIAPI cuptiProfilerSetConfig(CUpti_Profiler_SetConfig_Params* pParams);
347
+ /**
348
+ * \brief Unset metrics configuration profiled
349
+ *
350
+ */
351
+ CUptiResult CUPTIAPI cuptiProfilerUnsetConfig(CUpti_Profiler_UnsetConfig_Params* pParams);
352
+
353
+ /**
354
+ * \brief Params for cuptiProfilerBeginPass
355
+ */
356
+ typedef struct CUpti_Profiler_BeginPass_Params
357
+ {
358
+ size_t structSize; //!< [in] CUpti_Profiler_BeginPass_Params_STRUCT_SIZE
359
+ void* pPriv; //!< [in] assign to NULL
360
+
361
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
362
+ } CUpti_Profiler_BeginPass_Params;
363
+ #define CUpti_Profiler_BeginPass_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginPass_Params, ctx)
364
+
365
+ /**
366
+ * \brief Params for cuptiProfilerEndPass
367
+ */
368
+ typedef struct CUpti_Profiler_EndPass_Params
369
+ {
370
+ size_t structSize; //!< [in] CUpti_Profiler_EndPass_Params_STRUCT_SIZE
371
+ void* pPriv; //!< [in] assign to NULL
372
+
373
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
374
+ uint16_t targetNestingLevel; //! [out] The targetNestingLevel that will be collected by the *next* BeginPass.
375
+ size_t passIndex; //!< [out] The passIndex that will be collected by the *next* BeginPass
376
+ uint8_t allPassesSubmitted; //!< [out] becomes true when the last pass has been queued to the GPU
377
+ } CUpti_Profiler_EndPass_Params;
378
+ #define CUpti_Profiler_EndPass_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndPass_Params, allPassesSubmitted)
379
+
380
+ /**
381
+ * \brief Replay API: used for multipass collection.
382
+
383
+ * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay
384
+ * for multipass collection of the metrics configurations.
385
+ * It's a no-op in case of \ref CUPTI_KernelReplay.
386
+ */
387
+ CUptiResult cuptiProfilerBeginPass(CUpti_Profiler_BeginPass_Params* pParams);
388
+
389
+ /**
390
+ * \brief Replay API: used for multipass collection.
391
+
392
+ * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay
393
+ * for multipass collection of the metrics configurations.
394
+ * Its a no-op in case of \ref CUPTI_KernelReplay.
395
+ * Returns information for next pass.
396
+ */
397
+ CUptiResult cuptiProfilerEndPass(CUpti_Profiler_EndPass_Params* pParams);
398
+
399
+ /**
400
+ * \brief Params for cuptiProfilerEnableProfiling
401
+ */
402
+ typedef struct CUpti_Profiler_EnableProfiling_Params
403
+ {
404
+ size_t structSize; //!< [in] CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE
405
+ void* pPriv; //!< [in] assign to NULL
406
+
407
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
408
+ } CUpti_Profiler_EnableProfiling_Params;
409
+ #define CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EnableProfiling_Params, ctx)
410
+
411
+ /**
412
+ * \brief Params for cuptiProfilerDisableProfiling
413
+ */
414
+ typedef struct CUpti_Profiler_DisableProfiling_Params
415
+ {
416
+ size_t structSize; //!< [in] CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE
417
+ void* pPriv; //!< [in] assign to NULL
418
+
419
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
420
+ } CUpti_Profiler_DisableProfiling_Params;
421
+ #define CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DisableProfiling_Params, ctx)
422
+
423
+ /**
424
+ * \brief Enables Profiling
425
+ *
426
+ * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in
427
+ * a profiling session.
428
+ */
429
+ CUptiResult CUPTIAPI cuptiProfilerEnableProfiling(CUpti_Profiler_EnableProfiling_Params* pParams);
430
+
431
+ /**
432
+ * \brief Disable Profiling
433
+ *
434
+ * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in
435
+ * a profiling session.
436
+ */
437
+ CUptiResult CUPTIAPI cuptiProfilerDisableProfiling(CUpti_Profiler_DisableProfiling_Params* pParams);
438
+
439
+ /**
440
+ * \brief Params for cuptiProfilerIsPassCollected
441
+ */
442
+ typedef struct CUpti_Profiler_IsPassCollected_Params
443
+ {
444
+ size_t structSize; //!< [in] CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE
445
+ void* pPriv; //!< [in] assign to NULL
446
+
447
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
448
+ size_t numRangesDropped; //!< [out] number of ranges whose data was dropped in the processed pass
449
+ size_t numTraceBytesDropped; //!< [out] number of bytes not written to TraceBuffer due to buffer full
450
+ uint8_t onePassCollected; //!< [out] true if a pass was successfully decoded
451
+ uint8_t allPassesCollected; //!< [out] becomes true when the last pass has been decoded
452
+ } CUpti_Profiler_IsPassCollected_Params;
453
+ #define CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_IsPassCollected_Params, allPassesCollected)
454
+
455
+ /**
456
+ * \brief Asynchronous call to query if the submitted pass to GPU is collected
457
+ *
458
+ */
459
+ CUptiResult CUPTIAPI cuptiProfilerIsPassCollected(CUpti_Profiler_IsPassCollected_Params* pParams);
460
+
461
+ /**
462
+ * \brief Params for cuptiProfilerFlushCounterData
463
+ */
464
+ typedef struct CUpti_Profiler_FlushCounterData_Params
465
+ {
466
+ size_t structSize; //!< [in] CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE
467
+ void* pPriv; //!< [in] assign to NULL
468
+
469
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
470
+ size_t numRangesDropped; //!< [out] number of ranges whose data was dropped in the processed passes
471
+ size_t numTraceBytesDropped; //!< [out] number of bytes not written to TraceBuffer due to buffer full
472
+ } CUpti_Profiler_FlushCounterData_Params;
473
+ #define CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_FlushCounterData_Params, numTraceBytesDropped)
474
+
475
+ /**
476
+ * \brief Decode all the submitted passes
477
+ *
478
+ * Flush Counter data API to ensure every pass is decoded into the counterDataImage passed at beginSession.
479
+ * This will cause the CPU/GPU sync to collect all the undecoded pass.
480
+ */
481
+ CUptiResult CUPTIAPI cuptiProfilerFlushCounterData(CUpti_Profiler_FlushCounterData_Params* pParams);
482
+
483
+ typedef struct CUpti_Profiler_PushRange_Params
484
+ {
485
+ size_t structSize; //!< [in] CUpti_Profiler_PushRange_Params_STRUCT_SIZE
486
+ void* pPriv; //!< [in] assign to NULL
487
+
488
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
489
+ const char* pRangeName; //!< [in] specifies the range for subsequent launches; must not be NULL
490
+ size_t rangeNameLength; //!< [in] assign to strlen(pRangeName) if known; if set to zero, the library will call strlen()
491
+ } CUpti_Profiler_PushRange_Params;
492
+ #define CUpti_Profiler_PushRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PushRange_Params, rangeNameLength)
493
+
494
+ typedef struct CUpti_Profiler_PopRange_Params
495
+ {
496
+ size_t structSize; //!< [in] CUpti_Profiler_PopRange_Params_STRUCT_SIZE
497
+ void* pPriv; //!< [in] assign to NULL
498
+
499
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
500
+ } CUpti_Profiler_PopRange_Params;
501
+ #define CUpti_Profiler_PopRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PopRange_Params, ctx)
502
+
503
+
504
+ /**
505
+ * \brief Range API's : Push user range
506
+ *
507
+ * Counter data is collected per unique range-stack. Identified by a string label passsed by the user.
508
+ * It's an invalid operation in case of \ref CUPTI_AutoRange.
509
+ */
510
+ CUptiResult CUPTIAPI cuptiProfilerPushRange(CUpti_Profiler_PushRange_Params *pParams);
511
+
512
+ /**
513
+ * \brief Range API's : Pop user range
514
+ *
515
+ * Counter data is collected per unique range-stack. Identified by a string label passsed by the user.
516
+ * It's an invalid operation in case of \ref CUPTI_AutoRange.
517
+ */
518
+ CUptiResult CUPTIAPI cuptiProfilerPopRange(CUpti_Profiler_PopRange_Params *pParams);
519
+
520
+ /**
521
+ * \brief Params for cuptiProfilerGetCounterAvailability
522
+ */
523
+ typedef struct CUpti_Profiler_GetCounterAvailability_Params
524
+ {
525
+ size_t structSize; //!< [in] CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE
526
+ void* pPriv; //!< [in] assign to NULL
527
+ CUcontext ctx; //!< [in] if NULL, the current CUcontext is used
528
+ size_t counterAvailabilityImageSize; //!< [in/out] If `pCounterAvailabilityImage` is NULL, then the required size is returned in
529
+ //!< `counterAvailabilityImageSize`, otherwise `counterAvailabilityImageSize` should be set to the size of
530
+ //!< `pCounterAvailabilityImage`, and on return it would be overwritten with number of actual bytes copied
531
+ uint8_t* pCounterAvailabilityImage; //!< [in] buffer receiving counter availability image, may be NULL
532
+ } CUpti_Profiler_GetCounterAvailability_Params;
533
+ #define CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_GetCounterAvailability_Params, pCounterAvailabilityImage)
534
+
535
+ /**
536
+ * \brief Query counter availibility
537
+ *
538
+ * Use this API to query counter availability information in a buffer which can be used to filter unavailable raw metrics on host.
539
+ * Note: This API may fail, if any profiling or sampling session is active on the specified context or its device.
540
+ */
541
+ CUptiResult CUPTIAPI cuptiProfilerGetCounterAvailability(CUpti_Profiler_GetCounterAvailability_Params *pParams);
542
+
543
+ /// Generic support level enum for CUPTI
544
+ typedef enum
545
+ {
546
+ CUPTI_PROFILER_CONFIGURATION_UNKNOWN = 0, //!< Configuration support level unknown - either detection code errored out before setting this value, or unable to determine it
547
+ CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED, //!< Profiling is unavailable. For specific feature fields, this means that the current configuration of this feature does not work with profiling. For instance, SLI-enabled devices do not support profiling, and this value would be returned for SLI on an SLI-enabled device.
548
+ CUPTI_PROFILER_CONFIGURATION_DISABLED, //!< Profiling would be available for this configuration, but was disabled by the system
549
+ CUPTI_PROFILER_CONFIGURATION_SUPPORTED //!< Profiling is supported. For specific feature fields, this means that the current configuration of this feature works with profiling. For instance, SLI-enabled devices do not support profiling, and this value would only be returned for devices which are not SLI-enabled.
550
+ } CUpti_Profiler_Support_Level;
551
+
552
+ /**
553
+ * \brief Params for cuptiProfilerDeviceSupported
554
+ */
555
+ typedef struct
556
+ {
557
+ size_t structSize; //!< [in] Must be CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE
558
+ void *pPriv; //!< [in] assign to NULL
559
+ CUdevice cuDevice; //!< [in] if NULL, the current CUcontext is used
560
+
561
+ CUpti_Profiler_Support_Level isSupported; //!< [out] overall SUPPORTED / UNSUPPORTED flag representing whether Profiling and PC Sampling APIs work on the given device and configuration. SUPPORTED if all following flags are SUPPORTED, UNSUPPORTED otherwise.
562
+
563
+ CUpti_Profiler_Support_Level architecture; //!< [out] SUPPORTED if the device architecture level supports the Profiling API (Compute Capability >= 7.0), UNSUPPORTED otherwise
564
+ CUpti_Profiler_Support_Level sli; //!< [out] SUPPORTED if SLI is not enabled, UNSUPPORTED otherwise
565
+ CUpti_Profiler_Support_Level vGpu; //!< [out] SUPPORTED if vGPU is supported and profiling is enabled, DISABLED if profiling is supported but not enabled, UNSUPPORTED otherwise
566
+ CUpti_Profiler_Support_Level confidentialCompute; //!< [out] SUPPORTED if confidential compute is not enabled, UNSUPPORTED otherwise
567
+ CUpti_Profiler_Support_Level cmp; //!< [out] SUPPORTED if not NVIDIA Crypto Mining Processors (CMP), UNSUPPORTED otherwise
568
+ } CUpti_Profiler_DeviceSupported_Params;
569
+ #define CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, confidentialCompute)
570
+
571
+ /**
572
+ * \brief Query device compatibility with Profiling API
573
+ *
574
+ * Use this call to determine whether a compute device and configuration are compatible with the Profiling API.
575
+ * If the configuration does not support profiling, one of several flags will indicate why.
576
+ */
577
+ CUptiResult CUPTIAPI cuptiProfilerDeviceSupported(CUpti_Profiler_DeviceSupported_Params *pParams);
578
+
579
+ /** @} */ /* END CUPTI_METRIC_API */
580
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
581
+ #pragma GCC visibility pop
582
+ #endif
583
+
584
+ #ifdef __cplusplus
585
+ } /* extern "C" */
586
+ #endif
587
+
588
+ #endif /*_CUPTI_PROFILER_TARGET_H_*/
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_nvtx_meta.h ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2013-2018 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
51
+ #pragma GCC visibility push(default)
52
+ #endif
53
+
54
+ // *************************************************************************
55
+ // Definitions of structs to hold parameters for each function
56
+ // *************************************************************************
57
+
58
+ typedef struct nvtxMarkEx_params_st {
59
+ const nvtxEventAttributes_t* eventAttrib;
60
+ } nvtxMarkEx_params;
61
+
62
+ typedef struct nvtxMarkA_params_st {
63
+ const char* message;
64
+ } nvtxMarkA_params;
65
+
66
+ typedef struct nvtxMarkW_params_st {
67
+ const wchar_t* message;
68
+ } nvtxMarkW_params;
69
+
70
+ typedef struct nvtxRangeStartEx_params_st {
71
+ const nvtxEventAttributes_t* eventAttrib;
72
+ } nvtxRangeStartEx_params;
73
+
74
+ typedef struct nvtxRangeStartA_params_st {
75
+ const char* message;
76
+ } nvtxRangeStartA_params;
77
+
78
+ typedef struct nvtxRangeStartW_params_st {
79
+ const wchar_t* message;
80
+ } nvtxRangeStartW_params;
81
+
82
+ typedef struct nvtxRangeEnd_params_st {
83
+ nvtxRangeId_t id;
84
+ } nvtxRangeEnd_params;
85
+
86
+ typedef struct nvtxRangePushEx_params_st {
87
+ const nvtxEventAttributes_t* eventAttrib;
88
+ } nvtxRangePushEx_params;
89
+
90
+ typedef struct nvtxRangePushA_params_st {
91
+ const char* message;
92
+ } nvtxRangePushA_params;
93
+
94
+ typedef struct nvtxRangePushW_params_st {
95
+ const wchar_t* message;
96
+ } nvtxRangePushW_params;
97
+
98
+ typedef struct nvtxRangePop_params_st {
99
+ /* WAR: Windows compiler doesn't allow empty structs */
100
+ /* This field shouldn't be used */
101
+ void *dummy;
102
+ } nvtxRangePop_params;
103
+
104
+ typedef struct nvtxNameCategoryA_params_st {
105
+ uint32_t category;
106
+ const char* name;
107
+ } nvtxNameCategoryA_params;
108
+
109
+ typedef struct nvtxNameCategoryW_params_st {
110
+ uint32_t category;
111
+ const wchar_t* name;
112
+ } nvtxNameCategoryW_params;
113
+
114
+ typedef struct nvtxNameOsThreadA_params_st {
115
+ uint32_t threadId;
116
+ const char* name;
117
+ } nvtxNameOsThreadA_params;
118
+
119
+ typedef struct nvtxNameOsThreadW_params_st {
120
+ uint32_t threadId;
121
+ const wchar_t* name;
122
+ } nvtxNameOsThreadW_params;
123
+
124
+ typedef struct nvtxNameCuDeviceA_params_st {
125
+ CUdevice device;
126
+ const char* name;
127
+ } nvtxNameCuDeviceA_params;
128
+
129
+ typedef struct nvtxNameCuDeviceW_params_st {
130
+ CUdevice device;
131
+ const wchar_t* name;
132
+ } nvtxNameCuDeviceW_params;
133
+
134
+ typedef struct nvtxNameCuContextA_params_st {
135
+ CUcontext context;
136
+ const char* name;
137
+ } nvtxNameCuContextA_params;
138
+
139
+ typedef struct nvtxNameCuContextW_params_st {
140
+ CUcontext context;
141
+ const wchar_t* name;
142
+ } nvtxNameCuContextW_params;
143
+
144
+ typedef struct nvtxNameCuStreamA_params_st {
145
+ CUstream stream;
146
+ const char* name;
147
+ } nvtxNameCuStreamA_params;
148
+
149
+ typedef struct nvtxNameCuStreamW_params_st {
150
+ CUstream stream;
151
+ const wchar_t* name;
152
+ } nvtxNameCuStreamW_params;
153
+
154
+ typedef struct nvtxNameCuEventA_params_st {
155
+ CUevent event;
156
+ const char* name;
157
+ } nvtxNameCuEventA_params;
158
+
159
+ typedef struct nvtxNameCuEventW_params_st {
160
+ CUevent event;
161
+ const wchar_t* name;
162
+ } nvtxNameCuEventW_params;
163
+
164
+ typedef struct nvtxNameCudaDeviceA_params_st {
165
+ int device;
166
+ const char* name;
167
+ } nvtxNameCudaDeviceA_params;
168
+
169
+ typedef struct nvtxNameCudaDeviceW_params_st {
170
+ int device;
171
+ const wchar_t* name;
172
+ } nvtxNameCudaDeviceW_params;
173
+
174
+ typedef struct nvtxNameCudaStreamA_params_st {
175
+ cudaStream_t stream;
176
+ const char* name;
177
+ } nvtxNameCudaStreamA_params;
178
+
179
+ typedef struct nvtxNameCudaStreamW_params_st {
180
+ cudaStream_t stream;
181
+ const wchar_t* name;
182
+ } nvtxNameCudaStreamW_params;
183
+
184
+ typedef struct nvtxNameCudaEventA_params_st {
185
+ cudaEvent_t event;
186
+ const char* name;
187
+ } nvtxNameCudaEventA_params;
188
+
189
+ typedef struct nvtxNameCudaEventW_params_st {
190
+ cudaEvent_t event;
191
+ const wchar_t* name;
192
+ } nvtxNameCudaEventW_params;
193
+
194
+ typedef struct nvtxDomainCreateA_params_st {
195
+ const char* name;
196
+ } nvtxDomainCreateA_params;
197
+
198
+ typedef struct nvtxDomainDestroy_params_st {
199
+ nvtxDomainHandle_t domain;
200
+ } nvtxDomainDestroy_params;
201
+
202
+ typedef struct nvtxDomainMarkEx_params_st {
203
+ nvtxDomainHandle_t domain;
204
+ nvtxMarkEx_params core;
205
+ } nvtxDomainMarkEx_params;
206
+
207
+ typedef struct nvtxDomainRangeStartEx_params_st {
208
+ nvtxDomainHandle_t domain;
209
+ nvtxRangeStartEx_params core;
210
+ } nvtxDomainRangeStartEx_params;
211
+
212
+ typedef struct nvtxDomainRangeEnd_params_st {
213
+ nvtxDomainHandle_t domain;
214
+ nvtxRangeEnd_params core;
215
+ } nvtxDomainRangeEnd_params;
216
+
217
+ typedef struct nvtxDomainRangePushEx_params_st {
218
+ nvtxDomainHandle_t domain;
219
+ nvtxRangePushEx_params core;
220
+ } nvtxDomainRangePushEx_params;
221
+
222
+ typedef struct nvtxDomainRangePop_params_st {
223
+ nvtxDomainHandle_t domain;
224
+ } nvtxDomainRangePop_params;
225
+
226
+ typedef struct nvtxSyncUserCreate_params_st {
227
+ nvtxDomainHandle_t domain;
228
+ const nvtxSyncUserAttributes_t* attribs;
229
+ } nvtxSyncUserCreate_params;
230
+
231
+ typedef struct nvtxSyncUserCommon_params_st {
232
+ nvtxSyncUser_t handle;
233
+ } nvtxSyncUserCommon_params;
234
+
235
+ typedef struct nvtxDomainRegisterStringA_params_st {
236
+ nvtxDomainHandle_t domain;
237
+ const char* string;
238
+ } nvtxDomainRegisterStringA_params;
239
+
240
+ typedef struct nvtxDomainRegisterStringW_params_st {
241
+ nvtxDomainHandle_t domain;
242
+ const char* string;
243
+ } nvtxDomainRegisterStringW_params;
244
+
245
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
246
+ #pragma GCC visibility pop
247
+ #endif
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_common.h ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVPERF_COMMON_H
2
+ #define NVPERF_COMMON_H
3
+
4
+ /*
5
+ * Copyright 2014-2022 NVIDIA Corporation. All rights reserved.
6
+ *
7
+ * NOTICE TO USER:
8
+ *
9
+ * This source code is subject to NVIDIA ownership rights under U.S. and
10
+ * international Copyright laws.
11
+ *
12
+ * This software and the information contained herein is PROPRIETARY and
13
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
14
+ * of a form of NVIDIA software license agreement.
15
+ *
16
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
17
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
18
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
19
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
20
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
21
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
22
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
23
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
24
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
25
+ * OR PERFORMANCE OF THIS SOURCE CODE.
26
+ *
27
+ * U.S. Government End Users. This source code is a "commercial item" as
28
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
29
+ * "commercial computer software" and "commercial computer software
30
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
31
+ * and is provided to the U.S. Government only as a commercial end item.
32
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
33
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
34
+ * source code with only those rights set forth herein.
35
+ *
36
+ * Any use of this source code in individual and commercial software must
37
+ * include, in the user documentation and internal comments to the code,
38
+ * the above Disclaimer and U.S. Government End Users Notice.
39
+ */
40
+
41
+ #include <stddef.h>
42
+ #include <stdint.h>
43
+
44
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
45
+ #pragma GCC visibility push(default)
46
+ #if !defined(NVPW_LOCAL)
47
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
48
+ #endif
49
+ #else
50
+ #if !defined(NVPW_LOCAL)
51
+ #define NVPW_LOCAL
52
+ #endif
53
+ #endif
54
+
55
+ #ifdef __cplusplus
56
+ extern "C" {
57
+ #endif
58
+
59
+ /**
60
+ * @file nvperf_common.h
61
+ */
62
+
63
+ #ifndef NVPERF_NVPA_STATUS_DEFINED
64
+ #define NVPERF_NVPA_STATUS_DEFINED
65
+
66
+ /// Error codes.
67
+ typedef enum NVPA_Status
68
+ {
69
+ /// Success
70
+ NVPA_STATUS_SUCCESS = 0,
71
+ /// Generic error.
72
+ NVPA_STATUS_ERROR = 1,
73
+ /// Internal error. Please file a bug!
74
+ NVPA_STATUS_INTERNAL_ERROR = 2,
75
+ /// NVPW_InitializeTarget() has not been called yet.
76
+ NVPA_STATUS_NOT_INITIALIZED = 3,
77
+ /// The NvPerf DLL/DSO could not be loaded during NVPW_Initialize*.
78
+ NVPA_STATUS_NOT_LOADED = 4,
79
+ /// The function was not found in this version of the NvPerf DLL/DSO.
80
+ NVPA_STATUS_FUNCTION_NOT_FOUND = 5,
81
+ /// The request was intentionally not supported.
82
+ NVPA_STATUS_NOT_SUPPORTED = 6,
83
+ /// The request was not implemented by this version.
84
+ NVPA_STATUS_NOT_IMPLEMENTED = 7,
85
+ /// Invalid argument.
86
+ NVPA_STATUS_INVALID_ARGUMENT = 8,
87
+ /// UNUSED
88
+ NVPA_STATUS_INVALID_METRIC_ID = 9,
89
+ /// No driver has been loaded via NVPW_*_LoadDriver().
90
+ NVPA_STATUS_DRIVER_NOT_LOADED = 10,
91
+ /// Failed memory allocation.
92
+ NVPA_STATUS_OUT_OF_MEMORY = 11,
93
+ /// UNUSED
94
+ NVPA_STATUS_INVALID_THREAD_STATE = 12,
95
+ /// UNUSED
96
+ NVPA_STATUS_FAILED_CONTEXT_ALLOC = 13,
97
+ /// The specified GPU is not supported.
98
+ NVPA_STATUS_UNSUPPORTED_GPU = 14,
99
+ /// The installed NVIDIA driver is too old.
100
+ NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION = 15,
101
+ /// UNUSED
102
+ NVPA_STATUS_OBJECT_NOT_REGISTERED = 16,
103
+ /// Profiling permission not granted; see https://developer.nvidia.com/nvidia-development-tools-solutions-
104
+ /// ERR_NVGPUCTRPERM-permission-issue-performance-counters
105
+ NVPA_STATUS_INSUFFICIENT_PRIVILEGE = 17,
106
+ /// UNUSED
107
+ NVPA_STATUS_INVALID_CONTEXT_STATE = 18,
108
+ /// UNUSED
109
+ NVPA_STATUS_INVALID_OBJECT_STATE = 19,
110
+ /// The request could not be fulfilled because a system resource is already in use.
111
+ NVPA_STATUS_RESOURCE_UNAVAILABLE = 20,
112
+ /// UNUSED
113
+ NVPA_STATUS_DRIVER_LOADED_TOO_LATE = 21,
114
+ /// The provided buffer is not large enough.
115
+ NVPA_STATUS_INSUFFICIENT_SPACE = 22,
116
+ /// UNUSED
117
+ NVPA_STATUS_OBJECT_MISMATCH = 23,
118
+ /// Virtualized GPU (vGPU) is not supported.
119
+ NVPA_STATUS_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 24,
120
+ /// Profiling permission was not granted or the device was disabled.
121
+ NVPA_STATUS_PROFILING_NOT_ALLOWED = 25,
122
+ NVPA_STATUS__COUNT
123
+ } NVPA_Status;
124
+
125
+
126
+ #endif // NVPERF_NVPA_STATUS_DEFINED
127
+
128
+
129
+ #ifndef NVPERF_NVPA_ACTIVITY_KIND_DEFINED
130
+ #define NVPERF_NVPA_ACTIVITY_KIND_DEFINED
131
+
132
+ /// The configuration's activity-kind dictates which types of data may be collected.
133
+ typedef enum NVPA_ActivityKind
134
+ {
135
+ /// Invalid value.
136
+ NVPA_ACTIVITY_KIND_INVALID = 0,
137
+ /// A workload-centric activity for serialized and pipelined collection.
138
+ ///
139
+ /// Profiler is capable of collecting both serialized and pipelined metrics. The library introduces any
140
+ /// synchronization required to collect serialized metrics.
141
+ NVPA_ACTIVITY_KIND_PROFILER,
142
+ /// A realtime activity for sampling counters from the CPU or GPU.
143
+ NVPA_ACTIVITY_KIND_REALTIME_SAMPLED,
144
+ /// A realtime activity for profiling counters from the CPU or GPU without CPU/GPU synchronizations.
145
+ NVPA_ACTIVITY_KIND_REALTIME_PROFILER,
146
+ NVPA_ACTIVITY_KIND__COUNT
147
+ } NVPA_ActivityKind;
148
+
149
+
150
+ #endif // NVPERF_NVPA_ACTIVITY_KIND_DEFINED
151
+
152
+
153
+ #ifndef NVPERF_NVPA_BOOL_DEFINED
154
+ #define NVPERF_NVPA_BOOL_DEFINED
155
+ /// The type used for boolean values.
156
+ typedef uint8_t NVPA_Bool;
157
+ #endif // NVPERF_NVPA_BOOL_DEFINED
158
+
159
+ #ifndef NVPA_STRUCT_SIZE
160
+ #define NVPA_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
161
+ #endif // NVPA_STRUCT_SIZE
162
+
163
+ #ifndef NVPW_FIELD_EXISTS
164
+ #define NVPW_FIELD_EXISTS(pParams_, name_) \
165
+ ((pParams_)->structSize >= (size_t)((const uint8_t*)(&(pParams_)->name_) + sizeof(pParams_)->name_ - (const uint8_t*)(pParams_)))
166
+ #endif // NVPW_FIELD_EXISTS
167
+
168
+
169
+ #ifndef NVPERF_NVPA_GETPROCADDRESS_DEFINED
170
+ #define NVPERF_NVPA_GETPROCADDRESS_DEFINED
171
+
172
+ typedef NVPA_Status (*NVPA_GenericFn)(void);
173
+
174
+
175
+ ///
176
+ /// Gets the address of an NvPerf API function.
177
+ ///
178
+ /// \return A function pointer to the function, or NULL if the function is not available.
179
+ ///
180
+ /// \param pFunctionName [in] Name of the function to retrieve.
181
+ NVPA_GenericFn NVPA_GetProcAddress(const char* pFunctionName);
182
+
183
+ #endif
184
+
185
+ #ifndef NVPERF_NVPW_SETLIBRARYLOADPATHS_DEFINED
186
+ #define NVPERF_NVPW_SETLIBRARYLOADPATHS_DEFINED
187
+
188
+
189
+ typedef struct NVPW_SetLibraryLoadPaths_Params
190
+ {
191
+ /// [in]
192
+ size_t structSize;
193
+ /// [in] assign to NULL
194
+ void* pPriv;
195
+ /// [in] number of paths in ppPaths
196
+ size_t numPaths;
197
+ /// [in] array of null-terminated paths
198
+ const char** ppPaths;
199
+ } NVPW_SetLibraryLoadPaths_Params;
200
+ #define NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_SetLibraryLoadPaths_Params, ppPaths)
201
+
202
+ /// Sets library search path for \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget().
203
+ /// \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget load the NvPerf DLL/DSO. This function sets
204
+ /// ordered paths that will be searched with the LoadLibrary() or dlopen() call.
205
+ /// If load paths are set by this function, the default set of load paths
206
+ /// will not be attempted.
207
+ /// Each path must point at a directory (not a file name).
208
+ /// This function is not thread-safe.
209
+ /// Example Usage:
210
+ /// \code
211
+ /// const char* paths[] = {
212
+ /// "path1", "path2", etc
213
+ /// };
214
+ /// NVPW_SetLibraryLoadPaths_Params params{NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE};
215
+ /// params.numPaths = sizeof(paths)/sizeof(paths[0]);
216
+ /// params.ppPaths = paths;
217
+ /// NVPW_SetLibraryLoadPaths(&params);
218
+ /// NVPW_InitializeHost();
219
+ /// params.numPaths = 0;
220
+ /// params.ppPaths = NULL;
221
+ /// NVPW_SetLibraryLoadPaths(&params);
222
+ /// \endcode
223
+ NVPA_Status NVPW_SetLibraryLoadPaths(NVPW_SetLibraryLoadPaths_Params* pParams);
224
+
225
+ typedef struct NVPW_SetLibraryLoadPathsW_Params
226
+ {
227
+ /// [in]
228
+ size_t structSize;
229
+ /// [in] assign to NULL
230
+ void* pPriv;
231
+ /// [in] number of paths in ppwPaths
232
+ size_t numPaths;
233
+ /// [in] array of null-terminated paths
234
+ const wchar_t** ppwPaths;
235
+ } NVPW_SetLibraryLoadPathsW_Params;
236
+ #define NVPW_SetLibraryLoadPathsW_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_SetLibraryLoadPathsW_Params, ppwPaths)
237
+
238
+ /// Sets library search path for \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget().
239
+ /// \ref NVPW_InitializeHost() and \ref NVPW_InitializeTarget load the NvPerf DLL/DSO. This function sets
240
+ /// ordered paths that will be searched with the LoadLibrary() or dlopen() call.
241
+ /// If load paths are set by this function, the default set of load paths
242
+ /// will not be attempted.
243
+ /// Each path must point at a directory (not a file name).
244
+ /// This function is not thread-safe.
245
+ /// Example Usage:
246
+ /// \code
247
+ /// const wchar_t* wpaths[] = {
248
+ /// L"path1", L"path2", etc
249
+ /// };
250
+ /// NVPW_SetLibraryLoadPathsW_Params params{NVPW_SetLibraryLoadPathsW_Params_STRUCT_SIZE};
251
+ /// params.numPaths = sizeof(wpaths)/sizeof(wpaths[0]);
252
+ /// params.ppwPaths = wpaths;
253
+ /// NVPW_SetLibraryLoadPathsW(&params);
254
+ /// NVPW_InitializeHost();
255
+ /// params.numPaths = 0;
256
+ /// params.ppwPaths = NULL;
257
+ /// NVPW_SetLibraryLoadPathsW(&params);
258
+ /// \endcode
259
+ NVPA_Status NVPW_SetLibraryLoadPathsW(NVPW_SetLibraryLoadPathsW_Params* pParams);
260
+
261
+ #endif
262
+
263
+
264
+
265
+ #ifdef __cplusplus
266
+ } // extern "C"
267
+ #endif
268
+
269
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
270
+ #pragma GCC visibility pop
271
+ #endif
272
+
273
+ #endif // NVPERF_COMMON_H
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_cuda_host.h ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVPERF_CUDA_HOST_H
2
+ #define NVPERF_CUDA_HOST_H
3
+
4
+ /*
5
+ * Copyright 2014-2022 NVIDIA Corporation. All rights reserved.
6
+ *
7
+ * NOTICE TO USER:
8
+ *
9
+ * This source code is subject to NVIDIA ownership rights under U.S. and
10
+ * international Copyright laws.
11
+ *
12
+ * This software and the information contained herein is PROPRIETARY and
13
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
14
+ * of a form of NVIDIA software license agreement.
15
+ *
16
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
17
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
18
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
19
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
20
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
21
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
22
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
23
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
24
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
25
+ * OR PERFORMANCE OF THIS SOURCE CODE.
26
+ *
27
+ * U.S. Government End Users. This source code is a "commercial item" as
28
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
29
+ * "commercial computer software" and "commercial computer software
30
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
31
+ * and is provided to the U.S. Government only as a commercial end item.
32
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
33
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
34
+ * source code with only those rights set forth herein.
35
+ *
36
+ * Any use of this source code in individual and commercial software must
37
+ * include, in the user documentation and internal comments to the code,
38
+ * the above Disclaimer and U.S. Government End Users Notice.
39
+ */
40
+
41
+ #include <stddef.h>
42
+ #include <stdint.h>
43
+ #include "nvperf_common.h"
44
+ #include "nvperf_host.h"
45
+
46
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
47
+ #pragma GCC visibility push(default)
48
+ #if !defined(NVPW_LOCAL)
49
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
50
+ #endif
51
+ #else
52
+ #if !defined(NVPW_LOCAL)
53
+ #define NVPW_LOCAL
54
+ #endif
55
+ #endif
56
+
57
+ #ifdef __cplusplus
58
+ extern "C" {
59
+ #endif
60
+
61
+ /**
62
+ * @file nvperf_cuda_host.h
63
+ */
64
+
65
+ /// 'NVPA_MetricsContext' and its APIs are deprecated, please use 'NVPW_MetricsEvaluator' and its APIs instead.
66
+ typedef struct NVPA_MetricsContext NVPA_MetricsContext;
67
+
68
+ typedef struct NVPW_CUDA_MetricsContext_Create_Params
69
+ {
70
+ /// [in]
71
+ size_t structSize;
72
+ /// [in] assign to NULL
73
+ void* pPriv;
74
+ /// [in]
75
+ const char* pChipName;
76
+ /// [out]
77
+ struct NVPA_MetricsContext* pMetricsContext;
78
+ } NVPW_CUDA_MetricsContext_Create_Params;
79
+ #define NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsContext_Create_Params, pMetricsContext)
80
+
81
+ NVPA_Status NVPW_CUDA_MetricsContext_Create(NVPW_CUDA_MetricsContext_Create_Params* pParams);
82
+
83
+ typedef struct NVPW_CUDA_RawMetricsConfig_Create_Params
84
+ {
85
+ /// [in]
86
+ size_t structSize;
87
+ /// [in] assign to NULL
88
+ void* pPriv;
89
+ /// [in]
90
+ NVPA_ActivityKind activityKind;
91
+ /// [in]
92
+ const char* pChipName;
93
+ /// [out] new NVPA_RawMetricsConfig object
94
+ struct NVPA_RawMetricsConfig* pRawMetricsConfig;
95
+ } NVPW_CUDA_RawMetricsConfig_Create_Params;
96
+ #define NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_Params, pRawMetricsConfig)
97
+
98
+ NVPA_Status NVPW_CUDA_RawMetricsConfig_Create(NVPW_CUDA_RawMetricsConfig_Create_Params* pParams);
99
+
100
+ typedef struct NVPW_CUDA_RawMetricsConfig_Create_V2_Params
101
+ {
102
+ /// [in]
103
+ size_t structSize;
104
+ /// [in] assign to NULL
105
+ void* pPriv;
106
+ /// [in]
107
+ NVPA_ActivityKind activityKind;
108
+ /// [in] accepted for chips supported at the time-of-release.
109
+ const char* pChipName;
110
+ /// [in] buffer with counter availability image - required for future chip support
111
+ const uint8_t* pCounterAvailabilityImage;
112
+ /// [out] new NVPA_RawMetricsConfig object
113
+ struct NVPA_RawMetricsConfig* pRawMetricsConfig;
114
+ } NVPW_CUDA_RawMetricsConfig_Create_V2_Params;
115
+ #define NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_RawMetricsConfig_Create_V2_Params, pRawMetricsConfig)
116
+
117
+ /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
118
+ NVPA_Status NVPW_CUDA_RawMetricsConfig_Create_V2(NVPW_CUDA_RawMetricsConfig_Create_V2_Params* pParams);
119
+
120
+ typedef struct NVPW_CUDA_CounterDataBuilder_Create_Params
121
+ {
122
+ /// [in]
123
+ size_t structSize;
124
+ /// [in] assign to NULL
125
+ void* pPriv;
126
+ /// [in] accepted for chips supported at the time-of-release.
127
+ const char* pChipName;
128
+ /// [in] buffer with counter availability image - required for future chip support
129
+ const uint8_t* pCounterAvailabilityImage;
130
+ /// [out] new NVPA_CounterDataBuilder object
131
+ struct NVPA_CounterDataBuilder* pCounterDataBuilder;
132
+ } NVPW_CUDA_CounterDataBuilder_Create_Params;
133
+ #define NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_CounterDataBuilder_Create_Params, pCounterDataBuilder)
134
+
135
+ /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
136
+ NVPA_Status NVPW_CUDA_CounterDataBuilder_Create(NVPW_CUDA_CounterDataBuilder_Create_Params* pParams);
137
+
138
+ typedef struct NVPW_MetricsEvaluator NVPW_MetricsEvaluator;
139
+
140
+ typedef struct NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params
141
+ {
142
+ /// [in]
143
+ size_t structSize;
144
+ /// [in] assign to NULL
145
+ void* pPriv;
146
+ /// [in] accepted for chips supported at the time-of-release.
147
+ const char* pChipName;
148
+ /// [in] buffer with counter availability image - required for future chip support
149
+ const uint8_t* pCounterAvailabilityImage;
150
+ /// [out]
151
+ size_t scratchBufferSize;
152
+ } NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params;
153
+ #define NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params, scratchBufferSize)
154
+
155
+ /// Use either 'pChipName' or 'pCounterAvailabilityImage'.
156
+ NVPA_Status NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params* pParams);
157
+
158
+ typedef struct NVPW_CUDA_MetricsEvaluator_Initialize_Params
159
+ {
160
+ /// [in]
161
+ size_t structSize;
162
+ /// [in] assign to NULL
163
+ void* pPriv;
164
+ /// [in]
165
+ uint8_t* pScratchBuffer;
166
+ /// [in] the size of the 'pScratchBuffer' array, should be at least the size of the 'scratchBufferSize' returned
167
+ /// by 'NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize'
168
+ size_t scratchBufferSize;
169
+ /// [in] accepted for chips supported at the time-of-release.
170
+ const char* pChipName;
171
+ /// [in] buffer with counter availability image - required for future chip support
172
+ const uint8_t* pCounterAvailabilityImage;
173
+ /// [in]
174
+ const uint8_t* pCounterDataImage;
175
+ /// [in] must be provided if 'pCounterDataImage' is not NULL
176
+ size_t counterDataImageSize;
177
+ /// [out]
178
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
179
+ } NVPW_CUDA_MetricsEvaluator_Initialize_Params;
180
+ #define NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CUDA_MetricsEvaluator_Initialize_Params, pMetricsEvaluator)
181
+
182
+ /// Use one of 'pChipName', 'pCounterAvailabilityImage', or 'pCounterDataImage'. 'pChipName' or
183
+ /// 'pCounterAvailabilityImage' will create a metrics evaluator based on a virtual device while 'pCounterDataImage'
184
+ /// will create a metrics evaluator based on the actual device.
185
+ NVPA_Status NVPW_CUDA_MetricsEvaluator_Initialize(NVPW_CUDA_MetricsEvaluator_Initialize_Params* pParams);
186
+
187
+
188
+
189
+ #ifdef __cplusplus
190
+ } // extern "C"
191
+ #endif
192
+
193
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
194
+ #pragma GCC visibility pop
195
+ #endif
196
+
197
+ #endif // NVPERF_CUDA_HOST_H
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/nvperf_host.h ADDED
@@ -0,0 +1,1471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVPERF_HOST_H
2
+ #define NVPERF_HOST_H
3
+
4
+ /*
5
+ * Copyright 2014-2022 NVIDIA Corporation. All rights reserved.
6
+ *
7
+ * NOTICE TO USER:
8
+ *
9
+ * This source code is subject to NVIDIA ownership rights under U.S. and
10
+ * international Copyright laws.
11
+ *
12
+ * This software and the information contained herein is PROPRIETARY and
13
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
14
+ * of a form of NVIDIA software license agreement.
15
+ *
16
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
17
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
18
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
19
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
20
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
21
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
22
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
23
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
24
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
25
+ * OR PERFORMANCE OF THIS SOURCE CODE.
26
+ *
27
+ * U.S. Government End Users. This source code is a "commercial item" as
28
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
29
+ * "commercial computer software" and "commercial computer software
30
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
31
+ * and is provided to the U.S. Government only as a commercial end item.
32
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
33
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
34
+ * source code with only those rights set forth herein.
35
+ *
36
+ * Any use of this source code in individual and commercial software must
37
+ * include, in the user documentation and internal comments to the code,
38
+ * the above Disclaimer and U.S. Government End Users Notice.
39
+ */
40
+
41
+ #include <stddef.h>
42
+ #include <stdint.h>
43
+ #include "nvperf_common.h"
44
+
45
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
46
+ #pragma GCC visibility push(default)
47
+ #if !defined(NVPW_LOCAL)
48
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
49
+ #endif
50
+ #else
51
+ #if !defined(NVPW_LOCAL)
52
+ #define NVPW_LOCAL
53
+ #endif
54
+ #endif
55
+
56
+ #ifdef __cplusplus
57
+ extern "C" {
58
+ #endif
59
+
60
+ /**
61
+ * @file nvperf_host.h
62
+ */
63
+
64
+
65
+ // Guard against multiple definition of NvPerf host types
66
+ #ifndef NVPERF_HOST_API_DEFINED
67
+ #define NVPERF_HOST_API_DEFINED
68
+
69
+
70
+ /***************************************************************************//**
71
+ * @name Host Configuration
72
+ * @{
73
+ */
74
+
75
+ typedef struct NVPW_InitializeHost_Params
76
+ {
77
+ /// [in]
78
+ size_t structSize;
79
+ /// [in] assign to NULL
80
+ void* pPriv;
81
+ } NVPW_InitializeHost_Params;
82
+ #define NVPW_InitializeHost_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_InitializeHost_Params, pPriv)
83
+
84
+ /// Load the host library.
85
+ NVPA_Status NVPW_InitializeHost(NVPW_InitializeHost_Params* pParams);
86
+
87
+ typedef struct NVPW_CounterData_CalculateCounterDataImageCopySize_Params
88
+ {
89
+ /// [in]
90
+ size_t structSize;
91
+ /// [in] assign to NULL
92
+ void* pPriv;
93
+ /// The CounterDataPrefix generated from e.g. nvperf2 initdata or
94
+ /// NVPW_CounterDataBuilder_GetCounterDataPrefix(). Must be align(8).
95
+ const uint8_t* pCounterDataPrefix;
96
+ size_t counterDataPrefixSize;
97
+ /// max number of ranges that can be profiled
98
+ uint32_t maxNumRanges;
99
+ /// max number of RangeTree nodes; must be >= maxNumRanges
100
+ uint32_t maxNumRangeTreeNodes;
101
+ /// max string length of each RangeName, including the trailing NUL character
102
+ uint32_t maxRangeNameLength;
103
+ const uint8_t* pCounterDataSrc;
104
+ /// [out] required size of the copy buffer
105
+ size_t copyDataImageCounterSize;
106
+ } NVPW_CounterData_CalculateCounterDataImageCopySize_Params;
107
+ #define NVPW_CounterData_CalculateCounterDataImageCopySize_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_CalculateCounterDataImageCopySize_Params, copyDataImageCounterSize)
108
+
109
+ NVPA_Status NVPW_CounterData_CalculateCounterDataImageCopySize(NVPW_CounterData_CalculateCounterDataImageCopySize_Params* pParams);
110
+
111
+ typedef struct NVPW_CounterData_InitializeCounterDataImageCopy_Params
112
+ {
113
+ /// [in]
114
+ size_t structSize;
115
+ /// [in] assign to NULL
116
+ void* pPriv;
117
+ /// The CounterDataPrefix generated from e.g. nvperf2 initdata or
118
+ /// NVPW_CounterDataBuilder_GetCounterDataPrefix(). Must be align(8).
119
+ const uint8_t* pCounterDataPrefix;
120
+ size_t counterDataPrefixSize;
121
+ /// max number of ranges that can be profiled
122
+ uint32_t maxNumRanges;
123
+ /// max number of RangeTree nodes; must be >= maxNumRanges
124
+ uint32_t maxNumRangeTreeNodes;
125
+ /// max string length of each RangeName, including the trailing NUL character
126
+ uint32_t maxRangeNameLength;
127
+ const uint8_t* pCounterDataSrc;
128
+ uint8_t* pCounterDataDst;
129
+ } NVPW_CounterData_InitializeCounterDataImageCopy_Params;
130
+ #define NVPW_CounterData_InitializeCounterDataImageCopy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_InitializeCounterDataImageCopy_Params, pCounterDataDst)
131
+
132
+ NVPA_Status NVPW_CounterData_InitializeCounterDataImageCopy(NVPW_CounterData_InitializeCounterDataImageCopy_Params* pParams);
133
+
134
+ typedef struct NVPA_CounterDataCombiner NVPA_CounterDataCombiner;
135
+
136
+ typedef struct NVPW_CounterDataCombiner_Create_Params
137
+ {
138
+ /// [in]
139
+ size_t structSize;
140
+ /// [in] assign to NULL
141
+ void* pPriv;
142
+ /// The destination counter data into which the source datas will be combined
143
+ uint8_t* pCounterDataDst;
144
+ /// [out] The created counter data combiner
145
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
146
+ } NVPW_CounterDataCombiner_Create_Params;
147
+ #define NVPW_CounterDataCombiner_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_Create_Params, pCounterDataCombiner)
148
+
149
+ NVPA_Status NVPW_CounterDataCombiner_Create(NVPW_CounterDataCombiner_Create_Params* pParams);
150
+
151
+ typedef struct NVPW_CounterDataCombiner_Destroy_Params
152
+ {
153
+ /// [in]
154
+ size_t structSize;
155
+ /// [in] assign to NULL
156
+ void* pPriv;
157
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
158
+ } NVPW_CounterDataCombiner_Destroy_Params;
159
+ #define NVPW_CounterDataCombiner_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_Destroy_Params, pCounterDataCombiner)
160
+
161
+ NVPA_Status NVPW_CounterDataCombiner_Destroy(NVPW_CounterDataCombiner_Destroy_Params* pParams);
162
+
163
+ typedef struct NVPW_CounterDataCombiner_CreateRange_Params
164
+ {
165
+ /// [in]
166
+ size_t structSize;
167
+ /// [in] assign to NULL
168
+ void* pPriv;
169
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
170
+ size_t numDescriptions;
171
+ const char* const* ppDescriptions;
172
+ /// [out]
173
+ size_t rangeIndexDst;
174
+ } NVPW_CounterDataCombiner_CreateRange_Params;
175
+ #define NVPW_CounterDataCombiner_CreateRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_CreateRange_Params, rangeIndexDst)
176
+
177
+ NVPA_Status NVPW_CounterDataCombiner_CreateRange(NVPW_CounterDataCombiner_CreateRange_Params* pParams);
178
+
179
+ typedef struct NVPW_CounterDataCombiner_AccumulateIntoRange_Params
180
+ {
181
+ /// [in]
182
+ size_t structSize;
183
+ /// [in] assign to NULL
184
+ void* pPriv;
185
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
186
+ size_t rangeIndexDst;
187
+ uint32_t dstMultiplier;
188
+ const uint8_t* pCounterDataSrc;
189
+ size_t rangeIndexSrc;
190
+ uint32_t srcMultiplier;
191
+ } NVPW_CounterDataCombiner_AccumulateIntoRange_Params;
192
+ #define NVPW_CounterDataCombiner_AccumulateIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_AccumulateIntoRange_Params, srcMultiplier)
193
+
194
+ NVPA_Status NVPW_CounterDataCombiner_AccumulateIntoRange(NVPW_CounterDataCombiner_AccumulateIntoRange_Params* pParams);
195
+
196
+ typedef struct NVPW_CounterDataCombiner_SumIntoRange_Params
197
+ {
198
+ /// [in]
199
+ size_t structSize;
200
+ /// [in] assign to NULL
201
+ void* pPriv;
202
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
203
+ size_t rangeIndexDst;
204
+ const uint8_t* pCounterDataSrc;
205
+ size_t rangeIndexSrc;
206
+ } NVPW_CounterDataCombiner_SumIntoRange_Params;
207
+ #define NVPW_CounterDataCombiner_SumIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_SumIntoRange_Params, rangeIndexSrc)
208
+
209
+ NVPA_Status NVPW_CounterDataCombiner_SumIntoRange(NVPW_CounterDataCombiner_SumIntoRange_Params* pParams);
210
+
211
+ typedef struct NVPW_CounterDataCombiner_WeightedSumIntoRange_Params
212
+ {
213
+ /// [in]
214
+ size_t structSize;
215
+ /// [in] assign to NULL
216
+ void* pPriv;
217
+ NVPA_CounterDataCombiner* pCounterDataCombiner;
218
+ size_t rangeIndexDst;
219
+ double dstMultiplier;
220
+ const uint8_t* pCounterDataSrc;
221
+ size_t rangeIndexSrc;
222
+ double srcMultiplier;
223
+ } NVPW_CounterDataCombiner_WeightedSumIntoRange_Params;
224
+ #define NVPW_CounterDataCombiner_WeightedSumIntoRange_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataCombiner_WeightedSumIntoRange_Params, srcMultiplier)
225
+
226
+ NVPA_Status NVPW_CounterDataCombiner_WeightedSumIntoRange(NVPW_CounterDataCombiner_WeightedSumIntoRange_Params* pParams);
227
+
228
+ /**
229
+ * @}
230
+ ******************************************************************************/
231
+
232
+ /***************************************************************************//**
233
+ * @name Metrics Configuration
234
+ * @{
235
+ */
236
+
237
+ typedef struct NVPA_RawMetricsConfig NVPA_RawMetricsConfig;
238
+
239
+ typedef struct NVPA_RawMetricRequest
240
+ {
241
+ /// [in]
242
+ size_t structSize;
243
+ /// [in] assign to NULL
244
+ void* pPriv;
245
+ /// in
246
+ const char* pMetricName;
247
+ /// in
248
+ NVPA_Bool isolated;
249
+ /// in; ignored by AddMetric but observed by CounterData initialization
250
+ NVPA_Bool keepInstances;
251
+ } NVPA_RawMetricRequest;
252
+ #define NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPA_RawMetricRequest, keepInstances)
253
+
254
+ typedef struct NVPW_GetSupportedChipNames_Params
255
+ {
256
+ /// [in]
257
+ size_t structSize;
258
+ /// [in] assign to NULL
259
+ void* pPriv;
260
+ /// [out]
261
+ const char* const* ppChipNames;
262
+ /// [out]
263
+ size_t numChipNames;
264
+ } NVPW_GetSupportedChipNames_Params;
265
+ #define NVPW_GetSupportedChipNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_GetSupportedChipNames_Params, numChipNames)
266
+
267
+ NVPA_Status NVPW_GetSupportedChipNames(NVPW_GetSupportedChipNames_Params* pParams);
268
+
269
+ typedef struct NVPW_RawMetricsConfig_Destroy_Params
270
+ {
271
+ /// [in]
272
+ size_t structSize;
273
+ /// [in] assign to NULL
274
+ void* pPriv;
275
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
276
+ } NVPW_RawMetricsConfig_Destroy_Params;
277
+ #define NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_Destroy_Params, pRawMetricsConfig)
278
+
279
+ NVPA_Status NVPW_RawMetricsConfig_Destroy(NVPW_RawMetricsConfig_Destroy_Params* pParams);
280
+
281
+ typedef struct NVPW_RawMetricsConfig_SetCounterAvailability_Params
282
+ {
283
+ /// [in]
284
+ size_t structSize;
285
+ /// [in] assign to NULL
286
+ void* pPriv;
287
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
288
+ /// [in] buffer with counter availability image
289
+ const uint8_t* pCounterAvailabilityImage;
290
+ } NVPW_RawMetricsConfig_SetCounterAvailability_Params;
291
+ #define NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_SetCounterAvailability_Params, pCounterAvailabilityImage)
292
+
293
+ NVPA_Status NVPW_RawMetricsConfig_SetCounterAvailability(NVPW_RawMetricsConfig_SetCounterAvailability_Params* pParams);
294
+
295
+ typedef struct NVPW_RawMetricsConfig_BeginPassGroup_Params
296
+ {
297
+ /// [in]
298
+ size_t structSize;
299
+ /// [in] assign to NULL
300
+ void* pPriv;
301
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
302
+ size_t maxPassCount;
303
+ } NVPW_RawMetricsConfig_BeginPassGroup_Params;
304
+ #define NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_BeginPassGroup_Params, maxPassCount)
305
+
306
+ NVPA_Status NVPW_RawMetricsConfig_BeginPassGroup(NVPW_RawMetricsConfig_BeginPassGroup_Params* pParams);
307
+
308
+ typedef struct NVPW_RawMetricsConfig_EndPassGroup_Params
309
+ {
310
+ /// [in]
311
+ size_t structSize;
312
+ /// [in] assign to NULL
313
+ void* pPriv;
314
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
315
+ } NVPW_RawMetricsConfig_EndPassGroup_Params;
316
+ #define NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_EndPassGroup_Params, pRawMetricsConfig)
317
+
318
+ NVPA_Status NVPW_RawMetricsConfig_EndPassGroup(NVPW_RawMetricsConfig_EndPassGroup_Params* pParams);
319
+
320
+ typedef struct NVPW_RawMetricsConfig_GetNumMetrics_Params
321
+ {
322
+ /// [in]
323
+ size_t structSize;
324
+ /// [in] assign to NULL
325
+ void* pPriv;
326
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
327
+ /// [out]
328
+ size_t numMetrics;
329
+ } NVPW_RawMetricsConfig_GetNumMetrics_Params;
330
+ #define NVPW_RawMetricsConfig_GetNumMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumMetrics_Params, numMetrics)
331
+
332
+ NVPA_Status NVPW_RawMetricsConfig_GetNumMetrics(NVPW_RawMetricsConfig_GetNumMetrics_Params* pParams);
333
+
334
+ typedef struct NVPW_RawMetricsConfig_GetMetricProperties_Params
335
+ {
336
+ /// [in]
337
+ size_t structSize;
338
+ /// [in] assign to NULL
339
+ void* pPriv;
340
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
341
+ size_t metricIndex;
342
+ /// [out]
343
+ const char* pMetricName;
344
+ /// [out]
345
+ NVPA_Bool supportsPipelined;
346
+ /// [out]
347
+ NVPA_Bool supportsIsolated;
348
+ } NVPW_RawMetricsConfig_GetMetricProperties_Params;
349
+ #define NVPW_RawMetricsConfig_GetMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetMetricProperties_Params, supportsIsolated)
350
+
351
+ NVPA_Status NVPW_RawMetricsConfig_GetMetricProperties(NVPW_RawMetricsConfig_GetMetricProperties_Params* pParams);
352
+
353
+ typedef struct NVPW_RawMetricsConfig_GetMetricProperties_V2_Params
354
+ {
355
+ /// [in]
356
+ size_t structSize;
357
+ /// [in] assign to NULL
358
+ void* pPriv;
359
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
360
+ size_t metricIndex;
361
+ /// [out]
362
+ const char* pMetricName;
363
+ } NVPW_RawMetricsConfig_GetMetricProperties_V2_Params;
364
+ #define NVPW_RawMetricsConfig_GetMetricProperties_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetMetricProperties_V2_Params, pMetricName)
365
+
366
+ NVPA_Status NVPW_RawMetricsConfig_GetMetricProperties_V2(NVPW_RawMetricsConfig_GetMetricProperties_V2_Params* pParams);
367
+
368
+ typedef struct NVPW_RawMetricsConfig_AddMetrics_Params
369
+ {
370
+ /// [in]
371
+ size_t structSize;
372
+ /// [in] assign to NULL
373
+ void* pPriv;
374
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
375
+ const NVPA_RawMetricRequest* pRawMetricRequests;
376
+ size_t numMetricRequests;
377
+ } NVPW_RawMetricsConfig_AddMetrics_Params;
378
+ #define NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_AddMetrics_Params, numMetricRequests)
379
+
380
+ NVPA_Status NVPW_RawMetricsConfig_AddMetrics(NVPW_RawMetricsConfig_AddMetrics_Params* pParams);
381
+
382
+ typedef struct NVPW_RawMetricsConfig_IsAddMetricsPossible_Params
383
+ {
384
+ /// [in]
385
+ size_t structSize;
386
+ /// [in] assign to NULL
387
+ void* pPriv;
388
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
389
+ const NVPA_RawMetricRequest* pRawMetricRequests;
390
+ size_t numMetricRequests;
391
+ /// [out]
392
+ NVPA_Bool isPossible;
393
+ } NVPW_RawMetricsConfig_IsAddMetricsPossible_Params;
394
+ #define NVPW_RawMetricsConfig_IsAddMetricsPossible_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_IsAddMetricsPossible_Params, isPossible)
395
+
396
+ NVPA_Status NVPW_RawMetricsConfig_IsAddMetricsPossible(NVPW_RawMetricsConfig_IsAddMetricsPossible_Params* pParams);
397
+
398
+ typedef struct NVPW_RawMetricsConfig_GenerateConfigImage_Params
399
+ {
400
+ /// [in]
401
+ size_t structSize;
402
+ /// [in] assign to NULL
403
+ void* pPriv;
404
+ NVPA_RawMetricsConfig* pRawMetricsConfig;
405
+ /// [in] If true, all existing pass groups may be merged to reduce number of passes.
406
+ /// If merge was successful, distribution of counters in passes may be updated as a side-effect. The effects
407
+ /// will be persistent in pRawMetricsConfig.
408
+ NVPA_Bool mergeAllPassGroups;
409
+ } NVPW_RawMetricsConfig_GenerateConfigImage_Params;
410
+ #define NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GenerateConfigImage_Params, mergeAllPassGroups)
411
+
412
+ /// This API may fail if called inside a pass group with `mergeAllPassGroups` = true.
413
+ NVPA_Status NVPW_RawMetricsConfig_GenerateConfigImage(NVPW_RawMetricsConfig_GenerateConfigImage_Params* pParams);
414
+
415
+ typedef struct NVPW_RawMetricsConfig_GetConfigImage_Params
416
+ {
417
+ /// [in]
418
+ size_t structSize;
419
+ /// [in] assign to NULL
420
+ void* pPriv;
421
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
422
+ /// [in] Number of bytes allocated for pBuffer
423
+ size_t bytesAllocated;
424
+ /// [out] [optional] Buffer receiving the config image
425
+ uint8_t* pBuffer;
426
+ /// [out] Count of bytes that would be copied into pBuffer
427
+ size_t bytesCopied;
428
+ } NVPW_RawMetricsConfig_GetConfigImage_Params;
429
+ #define NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetConfigImage_Params, bytesCopied)
430
+
431
+ NVPA_Status NVPW_RawMetricsConfig_GetConfigImage(NVPW_RawMetricsConfig_GetConfigImage_Params* pParams);
432
+
433
+ typedef struct NVPW_RawMetricsConfig_GetNumPasses_Params
434
+ {
435
+ /// [in]
436
+ size_t structSize;
437
+ /// [in] assign to NULL
438
+ void* pPriv;
439
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
440
+ /// [out]
441
+ size_t numPipelinedPasses;
442
+ /// [out]
443
+ size_t numIsolatedPasses;
444
+ } NVPW_RawMetricsConfig_GetNumPasses_Params;
445
+ #define NVPW_RawMetricsConfig_GetNumPasses_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumPasses_Params, numIsolatedPasses)
446
+
447
+ /// Total num passes = numPipelinedPasses + numIsolatedPasses * numNestingLevels
448
+ NVPA_Status NVPW_RawMetricsConfig_GetNumPasses(NVPW_RawMetricsConfig_GetNumPasses_Params* pParams);
449
+
450
+ typedef struct NVPW_RawMetricsConfig_GetNumPasses_V2_Params
451
+ {
452
+ /// [in]
453
+ size_t structSize;
454
+ /// [in] assign to NULL
455
+ void* pPriv;
456
+ /// [in]
457
+ const NVPA_RawMetricsConfig* pRawMetricsConfig;
458
+ /// [out]
459
+ size_t numPasses;
460
+ } NVPW_RawMetricsConfig_GetNumPasses_V2_Params;
461
+ #define NVPW_RawMetricsConfig_GetNumPasses_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_RawMetricsConfig_GetNumPasses_V2_Params, numPasses)
462
+
463
+ /// Total num passes = numPasses * numNestingLevels
464
+ NVPA_Status NVPW_RawMetricsConfig_GetNumPasses_V2(NVPW_RawMetricsConfig_GetNumPasses_V2_Params* pParams);
465
+
466
+ /**
467
+ * @}
468
+ ******************************************************************************/
469
+
470
+ /***************************************************************************//**
471
+ * @name CounterData Creation
472
+ * @{
473
+ */
474
+
475
+ typedef struct NVPA_CounterDataBuilder NVPA_CounterDataBuilder;
476
+
477
+ typedef struct NVPW_CounterDataBuilder_Create_Params
478
+ {
479
+ /// [in]
480
+ size_t structSize;
481
+ /// [in] assign to NULL
482
+ void* pPriv;
483
+ /// [out]
484
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
485
+ const char* pChipName;
486
+ } NVPW_CounterDataBuilder_Create_Params;
487
+ #define NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_Create_Params, pChipName)
488
+
489
+ NVPA_Status NVPW_CounterDataBuilder_Create(NVPW_CounterDataBuilder_Create_Params* pParams);
490
+
491
+ typedef struct NVPW_CounterDataBuilder_Destroy_Params
492
+ {
493
+ /// [in]
494
+ size_t structSize;
495
+ /// [in] assign to NULL
496
+ void* pPriv;
497
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
498
+ } NVPW_CounterDataBuilder_Destroy_Params;
499
+ #define NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_Destroy_Params, pCounterDataBuilder)
500
+
501
+ NVPA_Status NVPW_CounterDataBuilder_Destroy(NVPW_CounterDataBuilder_Destroy_Params* pParams);
502
+
503
+ typedef struct NVPW_CounterDataBuilder_AddMetrics_Params
504
+ {
505
+ /// [in]
506
+ size_t structSize;
507
+ /// [in] assign to NULL
508
+ void* pPriv;
509
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
510
+ const NVPA_RawMetricRequest* pRawMetricRequests;
511
+ size_t numMetricRequests;
512
+ } NVPW_CounterDataBuilder_AddMetrics_Params;
513
+ #define NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_AddMetrics_Params, numMetricRequests)
514
+
515
+ NVPA_Status NVPW_CounterDataBuilder_AddMetrics(NVPW_CounterDataBuilder_AddMetrics_Params* pParams);
516
+
517
+ typedef struct NVPW_CounterDataBuilder_GetCounterDataPrefix_Params
518
+ {
519
+ /// [in]
520
+ size_t structSize;
521
+ /// [in] assign to NULL
522
+ void* pPriv;
523
+ NVPA_CounterDataBuilder* pCounterDataBuilder;
524
+ /// [in] Number of bytes allocated for pBuffer
525
+ size_t bytesAllocated;
526
+ /// [out] [optional] Buffer receiving the counter data prefix
527
+ uint8_t* pBuffer;
528
+ /// [out] Count of bytes that would be copied to pBuffer
529
+ size_t bytesCopied;
530
+ } NVPW_CounterDataBuilder_GetCounterDataPrefix_Params;
531
+ #define NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterDataBuilder_GetCounterDataPrefix_Params, bytesCopied)
532
+
533
+ NVPA_Status NVPW_CounterDataBuilder_GetCounterDataPrefix(NVPW_CounterDataBuilder_GetCounterDataPrefix_Params* pParams);
534
+
535
+ /**
536
+ * @}
537
+ ******************************************************************************/
538
+
539
+ /***************************************************************************//**
540
+ * @name MetricsContext - metric configuration and evaluation
541
+ * @{
542
+ */
543
+
544
+ /// 'NVPA_MetricsContext' and its APIs are deprecated, please use 'NVPW_MetricsEvaluator' and its APIs instead.
545
+ typedef struct NVPA_MetricsContext NVPA_MetricsContext;
546
+
547
+ typedef enum NVPA_MetricDetailLevel
548
+ {
549
+ NVPA_METRIC_DETAIL_LEVEL_INVALID,
550
+ NVPA_METRIC_DETAIL_LEVEL_GPU,
551
+ NVPA_METRIC_DETAIL_LEVEL_ALL,
552
+ NVPA_METRIC_DETAIL_LEVEL_GPU_AND_LEAF_INSTANCES,
553
+ NVPA_METRIC_DETAIL_LEVEL__COUNT
554
+ } NVPA_MetricDetailLevel;
555
+
556
+ typedef struct NVPW_MetricsContext_Destroy_Params
557
+ {
558
+ /// [in]
559
+ size_t structSize;
560
+ /// [in] assign to NULL
561
+ void* pPriv;
562
+ NVPA_MetricsContext* pMetricsContext;
563
+ } NVPW_MetricsContext_Destroy_Params;
564
+ #define NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_Destroy_Params, pMetricsContext)
565
+
566
+ NVPA_Status NVPW_MetricsContext_Destroy(NVPW_MetricsContext_Destroy_Params* pParams);
567
+
568
+ typedef struct NVPW_MetricsContext_RunScript_Params
569
+ {
570
+ /// [in]
571
+ size_t structSize;
572
+ /// [in] assign to NULL
573
+ void* pPriv;
574
+ NVPA_MetricsContext* pMetricsContext;
575
+ /// in : if true, upon error, calls PyErr_Print() which causes exceptions to be logged to stderr
576
+ NVPA_Bool printErrors;
577
+ /// in : the script source code
578
+ const char* pSource;
579
+ /// in : the filename reported in stack traces; if NULL, uses an auto-generated name
580
+ const char* pFileName;
581
+ } NVPW_MetricsContext_RunScript_Params;
582
+ #define NVPW_MetricsContext_RunScript_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_RunScript_Params, pFileName)
583
+
584
+ /// Runs code in the metrics module. Additional metrics can be added through this interface.
585
+ /// If printErrors is true, calls PyErr_Print() which causes exceptions to be logged to stderr.
586
+ /// Equivalent to:
587
+ /// exec(source, metrics.__dict__, metrics.__dict__)
588
+ NVPA_Status NVPW_MetricsContext_RunScript(NVPW_MetricsContext_RunScript_Params* pParams);
589
+
590
+ typedef struct NVPW_MetricsContext_ExecScript_Begin_Params
591
+ {
592
+ /// [in]
593
+ size_t structSize;
594
+ /// [in] assign to NULL
595
+ void* pPriv;
596
+ NVPA_MetricsContext* pMetricsContext;
597
+ /// in : if true, treats pSource as a statement to be eval'd; otherwise, calls exec.
598
+ NVPA_Bool isStatement;
599
+ /// in : if true, upon error, calls PyErr_Print() which causes exceptions to be logged to stderr
600
+ NVPA_Bool printErrors;
601
+ /// in : the script source code
602
+ const char* pSource;
603
+ /// in : the filename reported in stack traces; if NULL, uses an auto-generated name
604
+ const char* pFileName;
605
+ /// out: if isStatement, points at a string form of the evaluation; if !isStatement, points at
606
+ /// str(locals()['result'])
607
+ const char* pResultStr;
608
+ } NVPW_MetricsContext_ExecScript_Begin_Params;
609
+ #define NVPW_MetricsContext_ExecScript_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_ExecScript_Begin_Params, pResultStr)
610
+
611
+ /// Executes a script in the metrics module, but does not modify its contents (for ordinary queries).
612
+ /// Equivalent to one of:
613
+ /// eval(source, metrics.__dict__, {}) # isStatement true
614
+ /// exec(source, metrics.__dict__, {}) # isStatement false
615
+ NVPA_Status NVPW_MetricsContext_ExecScript_Begin(NVPW_MetricsContext_ExecScript_Begin_Params* pParams);
616
+
617
+ typedef struct NVPW_MetricsContext_ExecScript_End_Params
618
+ {
619
+ /// [in]
620
+ size_t structSize;
621
+ /// [in] assign to NULL
622
+ void* pPriv;
623
+ NVPA_MetricsContext* pMetricsContext;
624
+ } NVPW_MetricsContext_ExecScript_End_Params;
625
+ #define NVPW_MetricsContext_ExecScript_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_ExecScript_End_Params, pMetricsContext)
626
+
627
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_ExecScript_Begin.
628
+ NVPA_Status NVPW_MetricsContext_ExecScript_End(NVPW_MetricsContext_ExecScript_End_Params* pParams);
629
+
630
+ typedef struct NVPW_MetricsContext_GetCounterNames_Begin_Params
631
+ {
632
+ /// [in]
633
+ size_t structSize;
634
+ /// [in] assign to NULL
635
+ void* pPriv;
636
+ NVPA_MetricsContext* pMetricsContext;
637
+ /// [out]
638
+ size_t numCounters;
639
+ /// [out]
640
+ const char* const* ppCounterNames;
641
+ } NVPW_MetricsContext_GetCounterNames_Begin_Params;
642
+ #define NVPW_MetricsContext_GetCounterNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetCounterNames_Begin_Params, ppCounterNames)
643
+
644
+ /// Outputs (size, pointer) to an array of "const char* pCounterName". The lifetime of the array is tied to
645
+ /// MetricsContext. The names are sorted.
646
+ /// Impl: lazily creates list
647
+ NVPA_Status NVPW_MetricsContext_GetCounterNames_Begin(NVPW_MetricsContext_GetCounterNames_Begin_Params* pParams);
648
+
649
+ typedef struct NVPW_MetricsContext_GetCounterNames_End_Params
650
+ {
651
+ /// [in]
652
+ size_t structSize;
653
+ /// [in] assign to NULL
654
+ void* pPriv;
655
+ NVPA_MetricsContext* pMetricsContext;
656
+ } NVPW_MetricsContext_GetCounterNames_End_Params;
657
+ #define NVPW_MetricsContext_GetCounterNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetCounterNames_End_Params, pMetricsContext)
658
+
659
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetCounterNames_Begin.
660
+ NVPA_Status NVPW_MetricsContext_GetCounterNames_End(NVPW_MetricsContext_GetCounterNames_End_Params* pParams);
661
+
662
+ typedef struct NVPW_MetricsContext_GetThroughputNames_Begin_Params
663
+ {
664
+ /// [in]
665
+ size_t structSize;
666
+ /// [in] assign to NULL
667
+ void* pPriv;
668
+ NVPA_MetricsContext* pMetricsContext;
669
+ /// [out]
670
+ size_t numThroughputs;
671
+ /// [out]
672
+ const char* const* ppThroughputNames;
673
+ } NVPW_MetricsContext_GetThroughputNames_Begin_Params;
674
+ #define NVPW_MetricsContext_GetThroughputNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputNames_Begin_Params, ppThroughputNames)
675
+
676
+ /// Outputs (size, pointer) to an array of "const char* pThroughputName". The lifetime of the array is tied to
677
+ /// MetricsContext. The names are sorted.
678
+ /// Impl: lazily creates list
679
+ NVPA_Status NVPW_MetricsContext_GetThroughputNames_Begin(NVPW_MetricsContext_GetThroughputNames_Begin_Params* pParams);
680
+
681
+ typedef struct NVPW_MetricsContext_GetThroughputNames_End_Params
682
+ {
683
+ /// [in]
684
+ size_t structSize;
685
+ /// [in] assign to NULL
686
+ void* pPriv;
687
+ NVPA_MetricsContext* pMetricsContext;
688
+ } NVPW_MetricsContext_GetThroughputNames_End_Params;
689
+ #define NVPW_MetricsContext_GetThroughputNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputNames_End_Params, pMetricsContext)
690
+
691
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetThroughputNames_Begin.
692
+ NVPA_Status NVPW_MetricsContext_GetThroughputNames_End(NVPW_MetricsContext_GetThroughputNames_End_Params* pParams);
693
+
694
+ typedef struct NVPW_MetricsContext_GetRatioNames_Begin_Params
695
+ {
696
+ /// [in]
697
+ size_t structSize;
698
+ /// [in] assign to NULL
699
+ void* pPriv;
700
+ NVPA_MetricsContext* pMetricsContext;
701
+ /// [out]
702
+ size_t numRatios;
703
+ /// [out]
704
+ const char* const* ppRatioNames;
705
+ } NVPW_MetricsContext_GetRatioNames_Begin_Params;
706
+ #define NVPW_MetricsContext_GetRatioNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetRatioNames_Begin_Params, ppRatioNames)
707
+
708
+ /// Outputs (size, pointer) to an array of "const char* pRatioName". The lifetime of the array is tied to
709
+ /// MetricsContext. The names are sorted.
710
+ /// Impl: lazily creates list
711
+ NVPA_Status NVPW_MetricsContext_GetRatioNames_Begin(NVPW_MetricsContext_GetRatioNames_Begin_Params* pParams);
712
+
713
+ typedef struct NVPW_MetricsContext_GetRatioNames_End_Params
714
+ {
715
+ /// [in]
716
+ size_t structSize;
717
+ /// [in] assign to NULL
718
+ void* pPriv;
719
+ NVPA_MetricsContext* pMetricsContext;
720
+ } NVPW_MetricsContext_GetRatioNames_End_Params;
721
+ #define NVPW_MetricsContext_GetRatioNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetRatioNames_End_Params, pMetricsContext)
722
+
723
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetCounterNames_Begin.
724
+ NVPA_Status NVPW_MetricsContext_GetRatioNames_End(NVPW_MetricsContext_GetRatioNames_End_Params* pParams);
725
+
726
+ typedef struct NVPW_MetricsContext_GetMetricNames_Begin_Params
727
+ {
728
+ /// [in]
729
+ size_t structSize;
730
+ /// [in] assign to NULL
731
+ void* pPriv;
732
+ NVPA_MetricsContext* pMetricsContext;
733
+ /// out: number of elements in array ppMetricNames
734
+ size_t numMetrics;
735
+ /// out: pointer to array of 'const char* pMetricName'
736
+ const char* const* ppMetricNames;
737
+ /// in : if true, doesn't enumerate \<metric\>.peak_{burst, sustained}
738
+ NVPA_Bool hidePeakSubMetrics;
739
+ /// in : if true, doesn't enumerate \<metric\>.per_{active,elapsed,region,frame}_cycle
740
+ NVPA_Bool hidePerCycleSubMetrics;
741
+ /// in : if true, doesn't enumerate \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
742
+ NVPA_Bool hidePctOfPeakSubMetrics;
743
+ /// in : if false, enumerate \<unit\>__throughput.pct_of_peak_sustained_elapsed even if hidePctOfPeakSubMetrics
744
+ /// is true
745
+ NVPA_Bool hidePctOfPeakSubMetricsOnThroughputs;
746
+ } NVPW_MetricsContext_GetMetricNames_Begin_Params;
747
+ #define NVPW_MetricsContext_GetMetricNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricNames_Begin_Params, hidePctOfPeakSubMetricsOnThroughputs)
748
+
749
+ /// Outputs (size, pointer) to an array of "const char* pMetricName". The lifetime of the array is tied to
750
+ /// MetricsContext. The names are sorted.
751
+ /// Enumerates all metrics at all levels. Includes:
752
+ /// * counter.{sum,avg,min,max}
753
+ /// * throughput.{avg,min,max}
754
+ /// * \<metric\>.peak_{burst, sustained}
755
+ /// * \<metric\>.per_{active,elapsed,region,frame}_cycle
756
+ /// * \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
757
+ /// * \<metric\>.per.{other, other_pct}
758
+ NVPA_Status NVPW_MetricsContext_GetMetricNames_Begin(NVPW_MetricsContext_GetMetricNames_Begin_Params* pParams);
759
+
760
+ typedef struct NVPW_MetricsContext_GetMetricNames_End_Params
761
+ {
762
+ /// [in]
763
+ size_t structSize;
764
+ /// [in] assign to NULL
765
+ void* pPriv;
766
+ NVPA_MetricsContext* pMetricsContext;
767
+ } NVPW_MetricsContext_GetMetricNames_End_Params;
768
+ #define NVPW_MetricsContext_GetMetricNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricNames_End_Params, pMetricsContext)
769
+
770
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricNames_Begin.
771
+ NVPA_Status NVPW_MetricsContext_GetMetricNames_End(NVPW_MetricsContext_GetMetricNames_End_Params* pParams);
772
+
773
+ typedef struct NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params
774
+ {
775
+ /// [in]
776
+ size_t structSize;
777
+ /// [in] assign to NULL
778
+ void* pPriv;
779
+ NVPA_MetricsContext* pMetricsContext;
780
+ const char* pThroughputName;
781
+ const char* const* ppCounterNames;
782
+ const char* const* ppSubThroughputNames;
783
+ } NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params;
784
+ #define NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params, ppSubThroughputNames)
785
+
786
+ /// After this function returns, the lifetimes of strings pointed to by {ppCounterNames, ppSubThroughputNames,
787
+ /// ppSubMetricNames} are guaranteed until NVPW_MetricsContext_GetThroughputBreakdown_End, or until pMetricsContext
788
+ /// is destroyed
789
+ NVPA_Status NVPW_MetricsContext_GetThroughputBreakdown_Begin(NVPW_MetricsContext_GetThroughputBreakdown_Begin_Params* pParams);
790
+
791
+ typedef struct NVPW_MetricsContext_GetThroughputBreakdown_End_Params
792
+ {
793
+ /// [in]
794
+ size_t structSize;
795
+ /// [in] assign to NULL
796
+ void* pPriv;
797
+ NVPA_MetricsContext* pMetricsContext;
798
+ } NVPW_MetricsContext_GetThroughputBreakdown_End_Params;
799
+ #define NVPW_MetricsContext_GetThroughputBreakdown_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetThroughputBreakdown_End_Params, pMetricsContext)
800
+
801
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetThroughputBreakdown_Begin.
802
+ NVPA_Status NVPW_MetricsContext_GetThroughputBreakdown_End(NVPW_MetricsContext_GetThroughputBreakdown_End_Params* pParams);
803
+
804
+ typedef struct NVPW_MetricsContext_GetMetricProperties_Begin_Params
805
+ {
806
+ /// [in]
807
+ size_t structSize;
808
+ /// [in] assign to NULL
809
+ void* pPriv;
810
+ NVPA_MetricsContext* pMetricsContext;
811
+ const char* pMetricName;
812
+ /// out
813
+ const char* pDescription;
814
+ /// out
815
+ const char* pDimUnits;
816
+ /// out: a NULL-terminated array of pointers to RawMetric names that can be passed to
817
+ /// NVPW_RawMetricsConfig_AddMetrics()
818
+ const char** ppRawMetricDependencies;
819
+ /// out: metric.peak_burst.value.gpu
820
+ double gpuBurstRate;
821
+ /// out: metric.peak_sustained.value.gpu
822
+ double gpuSustainedRate;
823
+ /// out: a NULL-terminated array of pointers to RawMetric names that can be passed to
824
+ /// NVPW_RawMetricsConfig_AddMetrics().
825
+ const char** ppOptionalRawMetricDependencies;
826
+ } NVPW_MetricsContext_GetMetricProperties_Begin_Params;
827
+ #define NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricProperties_Begin_Params, ppOptionalRawMetricDependencies)
828
+
829
+ /// After this function returns, the lifetimes of strings pointed to by pMetricProperties or
830
+ /// ppOptionalRawMetricDependencies are guaranteed until NVPW_MetricsContext_GetMetricProperties_End, or until
831
+ /// pMetricsContext is destroyed.
832
+ NVPA_Status NVPW_MetricsContext_GetMetricProperties_Begin(NVPW_MetricsContext_GetMetricProperties_Begin_Params* pParams);
833
+
834
+ typedef struct NVPW_MetricsContext_GetMetricProperties_End_Params
835
+ {
836
+ /// [in]
837
+ size_t structSize;
838
+ /// [in] assign to NULL
839
+ void* pPriv;
840
+ NVPA_MetricsContext* pMetricsContext;
841
+ } NVPW_MetricsContext_GetMetricProperties_End_Params;
842
+ #define NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricProperties_End_Params, pMetricsContext)
843
+
844
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricProperties_Begin.
845
+ NVPA_Status NVPW_MetricsContext_GetMetricProperties_End(NVPW_MetricsContext_GetMetricProperties_End_Params* pParams);
846
+
847
+ typedef struct NVPW_MetricsContext_SetCounterData_Params
848
+ {
849
+ /// [in]
850
+ size_t structSize;
851
+ /// [in] assign to NULL
852
+ void* pPriv;
853
+ NVPA_MetricsContext* pMetricsContext;
854
+ const uint8_t* pCounterDataImage;
855
+ size_t rangeIndex;
856
+ NVPA_Bool isolated;
857
+ } NVPW_MetricsContext_SetCounterData_Params;
858
+ #define NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_SetCounterData_Params, isolated)
859
+
860
+ /// Sets data for subsequent evaluation calls.
861
+ /// Only one (CounterData, range, isolated) set of counters can be active at a time; subsequent calls will overwrite
862
+ /// previous calls' data.
863
+ NVPA_Status NVPW_MetricsContext_SetCounterData(NVPW_MetricsContext_SetCounterData_Params* pParams);
864
+
865
+ typedef struct NVPW_MetricsContext_SetUserData_Params
866
+ {
867
+ /// [in]
868
+ size_t structSize;
869
+ /// [in] assign to NULL
870
+ void* pPriv;
871
+ NVPA_MetricsContext* pMetricsContext;
872
+ /// duration in ns of user defined frame
873
+ double frameDuration;
874
+ /// duration in ns of user defined region
875
+ double regionDuration;
876
+ } NVPW_MetricsContext_SetUserData_Params;
877
+ #define NVPW_MetricsContext_SetUserData_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_SetUserData_Params, regionDuration)
878
+
879
+ /// Sets user data for subsequent evaluation calls.
880
+ NVPA_Status NVPW_MetricsContext_SetUserData(NVPW_MetricsContext_SetUserData_Params* pParams);
881
+
882
+ typedef struct NVPW_MetricsContext_EvaluateToGpuValues_Params
883
+ {
884
+ /// [in]
885
+ size_t structSize;
886
+ /// [in] assign to NULL
887
+ void* pPriv;
888
+ NVPA_MetricsContext* pMetricsContext;
889
+ size_t numMetrics;
890
+ const char* const* ppMetricNames;
891
+ /// [out]
892
+ double* pMetricValues;
893
+ } NVPW_MetricsContext_EvaluateToGpuValues_Params;
894
+ #define NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_EvaluateToGpuValues_Params, pMetricValues)
895
+
896
+ /// Evaluate multiple metrics to retrieve their GPU values.
897
+ NVPA_Status NVPW_MetricsContext_EvaluateToGpuValues(NVPW_MetricsContext_EvaluateToGpuValues_Params* pParams);
898
+
899
+ typedef struct NVPW_MetricsContext_GetMetricSuffix_Begin_Params
900
+ {
901
+ /// [in]
902
+ size_t structSize;
903
+ /// [in] assign to NULL
904
+ void* pPriv;
905
+ NVPA_MetricsContext* pMetricsContext;
906
+ /// in: pointer to the metric name
907
+ const char* pMetricName;
908
+ /// out: number of elements in array ppSuffixes
909
+ size_t numSuffixes;
910
+ /// out: pointer to array of 'const char* pSuffixes'
911
+ const char* const* ppSuffixes;
912
+ /// in : if true, doesn't enumerate \<metric\>.peak_{burst, sustained}
913
+ NVPA_Bool hidePeakSubMetrics;
914
+ /// in : if true, doesn't enumerate \<metric\>.per_{active,elapsed,region,frame}_cycle
915
+ NVPA_Bool hidePerCycleSubMetrics;
916
+ /// in : if true, doesn't enumerate \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
917
+ NVPA_Bool hidePctOfPeakSubMetrics;
918
+ /// in : if false, enumerate \<unit\>__throughput.pct_of_peak_sustained_elapsed even if hidePctOfPeakSubMetrics
919
+ /// is true
920
+ NVPA_Bool hidePctOfPeakSubMetricsOnThroughputs;
921
+ } NVPW_MetricsContext_GetMetricSuffix_Begin_Params;
922
+ #define NVPW_MetricsContext_GetMetricSuffix_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricSuffix_Begin_Params, hidePctOfPeakSubMetricsOnThroughputs)
923
+
924
+ /// Outputs (size, pointer) to an array of "const char* pSuffixes". The lifetime of the array is tied to
925
+ /// MetricsContext.
926
+ /// return all the suffixes the metric has. the possible suffixes include:
927
+ /// * counter.{sum,avg,min,max}
928
+ /// * throughput.{avg,min,max}
929
+ /// * \<metric\>.peak_{burst, sustained}
930
+ /// * \<metric\>.per_{active,elapsed,region,frame}_cycle
931
+ /// * \<metric\>.pct_of_peak_{burst,sustained}_{active,elapsed,region,frame}
932
+ /// * \<metric\>.per.{other, other_pct}
933
+ NVPA_Status NVPW_MetricsContext_GetMetricSuffix_Begin(NVPW_MetricsContext_GetMetricSuffix_Begin_Params* pParams);
934
+
935
+ typedef struct NVPW_MetricsContext_GetMetricSuffix_End_Params
936
+ {
937
+ /// [in]
938
+ size_t structSize;
939
+ /// [in] assign to NULL
940
+ void* pPriv;
941
+ NVPA_MetricsContext* pMetricsContext;
942
+ } NVPW_MetricsContext_GetMetricSuffix_End_Params;
943
+ #define NVPW_MetricsContext_GetMetricSuffix_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricSuffix_End_Params, pMetricsContext)
944
+
945
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricSuffix_Begin.
946
+ NVPA_Status NVPW_MetricsContext_GetMetricSuffix_End(NVPW_MetricsContext_GetMetricSuffix_End_Params* pParams);
947
+
948
+ typedef struct NVPW_MetricsContext_GetMetricBaseNames_Begin_Params
949
+ {
950
+ /// [in]
951
+ size_t structSize;
952
+ /// [in] assign to NULL
953
+ void* pPriv;
954
+ NVPA_MetricsContext* pMetricsContext;
955
+ /// out: number of elements in array pMetricsBaseNames
956
+ size_t numMetricBaseNames;
957
+ /// out: pointer to array of 'const char* pMetricsBaseName'
958
+ const char* const* ppMetricBaseNames;
959
+ } NVPW_MetricsContext_GetMetricBaseNames_Begin_Params;
960
+ #define NVPW_MetricsContext_GetMetricBaseNames_Begin_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricBaseNames_Begin_Params, ppMetricBaseNames)
961
+
962
+ /// Outputs (size, pointer) to an array of "const char* ppMetricBaseNames". The lifetime of the array is tied to
963
+ /// MetricsContext.
964
+ /// return all the metric base names.
965
+ NVPA_Status NVPW_MetricsContext_GetMetricBaseNames_Begin(NVPW_MetricsContext_GetMetricBaseNames_Begin_Params* pParams);
966
+
967
+ typedef struct NVPW_MetricsContext_GetMetricBaseNames_End_Params
968
+ {
969
+ /// [in]
970
+ size_t structSize;
971
+ /// [in] assign to NULL
972
+ void* pPriv;
973
+ NVPA_MetricsContext* pMetricsContext;
974
+ } NVPW_MetricsContext_GetMetricBaseNames_End_Params;
975
+ #define NVPW_MetricsContext_GetMetricBaseNames_End_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsContext_GetMetricBaseNames_End_Params, pMetricsContext)
976
+
977
+ /// Cleans up memory internally allocated by NVPW_MetricsContext_GetMetricBaseNames_Begin.
978
+ NVPA_Status NVPW_MetricsContext_GetMetricBaseNames_End(NVPW_MetricsContext_GetMetricBaseNames_End_Params* pParams);
979
+
980
+ /**
981
+ * @}
982
+ ******************************************************************************/
983
+
984
+ /***************************************************************************//**
985
+ * @name Metrics Evaluator
986
+ * @{
987
+ */
988
+
989
+ typedef struct NVPW_MetricsEvaluator NVPW_MetricsEvaluator;
990
+
991
+ #ifndef NVPW_DIM_UNIT_DEFINED
992
+ #define NVPW_DIM_UNIT_DEFINED
993
+ typedef enum NVPW_DimUnitName
994
+ {
995
+ NVPW_DIM_UNIT_INVALID = 3518299157,
996
+ NVPW_DIM_UNIT_UNITLESS = 2126137902,
997
+ NVPW_DIM_UNIT_ATTRIBUTES = 3776338729,
998
+ NVPW_DIM_UNIT_BYTES = 3797850191,
999
+ NVPW_DIM_UNIT_CTAS = 1960564139,
1000
+ NVPW_DIM_UNIT_DRAM_CYCLES = 2650981327,
1001
+ NVPW_DIM_UNIT_FBP_CYCLES = 1785238957,
1002
+ NVPW_DIM_UNIT_FE_OPS = 2919159083,
1003
+ NVPW_DIM_UNIT_GPC_CYCLES = 1222631184,
1004
+ NVPW_DIM_UNIT_IDC_REQUESTS = 2012649669,
1005
+ NVPW_DIM_UNIT_INSTRUCTIONS = 1418625543,
1006
+ NVPW_DIM_UNIT_L1DATA_BANK_ACCESSES = 1479493682,
1007
+ NVPW_DIM_UNIT_L1DATA_BANK_CONFLICTS = 3433170787,
1008
+ NVPW_DIM_UNIT_L1TEX_REQUESTS = 1306473767,
1009
+ NVPW_DIM_UNIT_L1TEX_TAGS = 26573010,
1010
+ NVPW_DIM_UNIT_L1TEX_WAVEFRONTS = 129373765,
1011
+ NVPW_DIM_UNIT_L2_REQUESTS = 1143695106,
1012
+ NVPW_DIM_UNIT_L2_SECTORS = 3424101564,
1013
+ NVPW_DIM_UNIT_L2_TAGS = 3755612781,
1014
+ NVPW_DIM_UNIT_NANOSECONDS = 3047500672,
1015
+ NVPW_DIM_UNIT_NVLRX_CYCLES = 4059934930,
1016
+ NVPW_DIM_UNIT_NVLTX_CYCLES = 1814350488,
1017
+ NVPW_DIM_UNIT_PCIE_CYCLES = 1230450943,
1018
+ NVPW_DIM_UNIT_PERCENT = 1284354694,
1019
+ NVPW_DIM_UNIT_PIXELS = 4227616663,
1020
+ NVPW_DIM_UNIT_PIXEL_SHADER_BARRIERS = 3705502518,
1021
+ NVPW_DIM_UNIT_PRIMITIVES = 2373084002,
1022
+ NVPW_DIM_UNIT_QUADS = 1539753497,
1023
+ NVPW_DIM_UNIT_REGISTERS = 2837260947,
1024
+ NVPW_DIM_UNIT_SAMPLES = 746046551,
1025
+ NVPW_DIM_UNIT_SECONDS = 1164825258,
1026
+ NVPW_DIM_UNIT_SYS_CYCLES = 3310821688,
1027
+ NVPW_DIM_UNIT_TEXELS = 1293214069,
1028
+ NVPW_DIM_UNIT_THREADS = 164261907,
1029
+ NVPW_DIM_UNIT_VERTICES = 1873662209,
1030
+ NVPW_DIM_UNIT_WARPS = 97951949,
1031
+ NVPW_DIM_UNIT_WORKLOADS = 1728142656
1032
+ } NVPW_DimUnitName;
1033
+ #endif //NVPW_DIM_UNIT_DEFINED
1034
+
1035
+ #ifndef NVPW_HW_UNIT_DEFINED
1036
+ #define NVPW_HW_UNIT_DEFINED
1037
+ typedef enum NVPW_HwUnit
1038
+ {
1039
+ NVPW_HW_UNIT_INVALID = 3498035701,
1040
+ NVPW_HW_UNIT_CROP = 2872137846,
1041
+ NVPW_HW_UNIT_DRAM = 1662616918,
1042
+ NVPW_HW_UNIT_DRAMC = 1401232876,
1043
+ NVPW_HW_UNIT_FBP = 2947194306,
1044
+ NVPW_HW_UNIT_FBPA = 690045803,
1045
+ NVPW_HW_UNIT_FE = 2204924321,
1046
+ NVPW_HW_UNIT_GPC = 1911735839,
1047
+ NVPW_HW_UNIT_GPU = 1014363534,
1048
+ NVPW_HW_UNIT_GR = 2933618517,
1049
+ NVPW_HW_UNIT_IDC = 842765289,
1050
+ NVPW_HW_UNIT_L1TEX = 893940957,
1051
+ NVPW_HW_UNIT_LTS = 2333266697,
1052
+ NVPW_HW_UNIT_NVLRX = 3091684901,
1053
+ NVPW_HW_UNIT_NVLTX = 869679659,
1054
+ NVPW_HW_UNIT_PCIE = 3433264174,
1055
+ NVPW_HW_UNIT_PDA = 345193251,
1056
+ NVPW_HW_UNIT_PES = 804128425,
1057
+ NVPW_HW_UNIT_PROP = 3339255507,
1058
+ NVPW_HW_UNIT_RASTER = 187932504,
1059
+ NVPW_HW_UNIT_SM = 724224710,
1060
+ NVPW_HW_UNIT_SMSP = 2837616917,
1061
+ NVPW_HW_UNIT_SYS = 768990063,
1062
+ NVPW_HW_UNIT_TPC = 1889024613,
1063
+ NVPW_HW_UNIT_VAF = 753670509,
1064
+ NVPW_HW_UNIT_VPC = 275561583,
1065
+ NVPW_HW_UNIT_ZROP = 979500456
1066
+ } NVPW_HwUnit;
1067
+ #endif //NVPW_HW_UNIT_DEFINED
1068
+
1069
+ typedef enum NVPW_RollupOp
1070
+ {
1071
+ NVPW_ROLLUP_OP_AVG = 0,
1072
+ NVPW_ROLLUP_OP_MAX,
1073
+ NVPW_ROLLUP_OP_MIN,
1074
+ NVPW_ROLLUP_OP_SUM,
1075
+ NVPW_ROLLUP_OP__COUNT
1076
+ } NVPW_RollupOp;
1077
+
1078
+ typedef enum NVPW_MetricType
1079
+ {
1080
+ NVPW_METRIC_TYPE_COUNTER = 0,
1081
+ NVPW_METRIC_TYPE_RATIO,
1082
+ NVPW_METRIC_TYPE_THROUGHPUT,
1083
+ NVPW_METRIC_TYPE__COUNT
1084
+ } NVPW_MetricType;
1085
+
1086
+ typedef enum NVPW_Submetric
1087
+ {
1088
+ NVPW_SUBMETRIC_NONE = 0,
1089
+ NVPW_SUBMETRIC_PEAK_SUSTAINED = 1,
1090
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE = 2,
1091
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE_PER_SECOND = 3,
1092
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED = 4,
1093
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED_PER_SECOND = 5,
1094
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME = 6,
1095
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME_PER_SECOND = 7,
1096
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION = 8,
1097
+ NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION_PER_SECOND = 9,
1098
+ NVPW_SUBMETRIC_PER_CYCLE_ACTIVE = 10,
1099
+ NVPW_SUBMETRIC_PER_CYCLE_ELAPSED = 11,
1100
+ NVPW_SUBMETRIC_PER_CYCLE_IN_FRAME = 12,
1101
+ NVPW_SUBMETRIC_PER_CYCLE_IN_REGION = 13,
1102
+ NVPW_SUBMETRIC_PER_SECOND = 14,
1103
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ACTIVE = 15,
1104
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ELAPSED = 16,
1105
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_FRAME = 17,
1106
+ NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_REGION = 18,
1107
+ NVPW_SUBMETRIC_MAX_RATE = 19,
1108
+ NVPW_SUBMETRIC_PCT = 20,
1109
+ NVPW_SUBMETRIC_RATIO = 21,
1110
+ NVPW_SUBMETRIC__COUNT
1111
+ } NVPW_Submetric;
1112
+
1113
+ typedef struct NVPW_MetricEvalRequest
1114
+ {
1115
+ /// the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
1116
+ size_t metricIndex;
1117
+ /// one of 'NVPW_MetricType'
1118
+ uint8_t metricType;
1119
+ /// one of 'NVPW_RollupOp', required for Counter and Throughput, doesn't apply to Ratio
1120
+ uint8_t rollupOp;
1121
+ /// one of 'NVPW_Submetric', required for Ratio and Throughput, optional for Counter
1122
+ uint16_t submetric;
1123
+ } NVPW_MetricEvalRequest;
1124
+ #define NVPW_MetricEvalRequest_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricEvalRequest, submetric)
1125
+
1126
+ typedef struct NVPW_DimUnitFactor
1127
+ {
1128
+ /// one of 'NVPW_DimUnitName'
1129
+ uint32_t dimUnit;
1130
+ int8_t exponent;
1131
+ } NVPW_DimUnitFactor;
1132
+ #define NVPW_DimUnitFactor_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_DimUnitFactor, exponent)
1133
+
1134
+ typedef struct NVPW_MetricsEvaluator_Destroy_Params
1135
+ {
1136
+ /// [in]
1137
+ size_t structSize;
1138
+ /// [in] assign to NULL
1139
+ void* pPriv;
1140
+ /// [in]
1141
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1142
+ } NVPW_MetricsEvaluator_Destroy_Params;
1143
+ #define NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_Destroy_Params, pMetricsEvaluator)
1144
+
1145
+ NVPA_Status NVPW_MetricsEvaluator_Destroy(NVPW_MetricsEvaluator_Destroy_Params* pParams);
1146
+
1147
+ typedef struct NVPW_MetricsEvaluator_GetMetricNames_Params
1148
+ {
1149
+ /// [in]
1150
+ size_t structSize;
1151
+ /// [in] assign to NULL
1152
+ void* pPriv;
1153
+ /// [in]
1154
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1155
+ /// [in] one of 'NVPW_MetricType'
1156
+ uint8_t metricType;
1157
+ /// [out]
1158
+ const char* pMetricNames;
1159
+ /// [out]
1160
+ const size_t* pMetricNameBeginIndices;
1161
+ /// [out]
1162
+ size_t numMetrics;
1163
+ } NVPW_MetricsEvaluator_GetMetricNames_Params;
1164
+ #define NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricNames_Params, numMetrics)
1165
+
1166
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricNames(NVPW_MetricsEvaluator_GetMetricNames_Params* pParams);
1167
+
1168
+ typedef struct NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params
1169
+ {
1170
+ /// [in]
1171
+ size_t structSize;
1172
+ /// [in] assign to NULL
1173
+ void* pPriv;
1174
+ /// [in]
1175
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1176
+ /// [in] can be either a base metric or a metric
1177
+ const char* pMetricName;
1178
+ /// [out] one of 'NVPW_MetricType'
1179
+ uint8_t metricType;
1180
+ /// [out] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
1181
+ size_t metricIndex;
1182
+ } NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params;
1183
+ #define NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params, metricIndex)
1184
+
1185
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricTypeAndIndex(NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params* pParams);
1186
+
1187
+ typedef struct NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params
1188
+ {
1189
+ /// [in]
1190
+ size_t structSize;
1191
+ /// [in] assign to NULL
1192
+ void* pPriv;
1193
+ /// [in]
1194
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1195
+ /// [in]
1196
+ const char* pMetricName;
1197
+ /// [inout] 'pMetricEvalRequest' is in, '*pMetricEvalRequest' is out
1198
+ struct NVPW_MetricEvalRequest* pMetricEvalRequest;
1199
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
1200
+ size_t metricEvalRequestStructSize;
1201
+ } NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params;
1202
+ #define NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params, metricEvalRequestStructSize)
1203
+
1204
+ NVPA_Status NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params* pParams);
1205
+
1206
+ typedef struct NVPW_MetricsEvaluator_HwUnitToString_Params
1207
+ {
1208
+ /// [in]
1209
+ size_t structSize;
1210
+ /// [in] assign to NULL
1211
+ void* pPriv;
1212
+ /// [in]
1213
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1214
+ /// [in] one of 'NVPW_HwUnit'
1215
+ uint32_t hwUnit;
1216
+ /// [out]
1217
+ const char* pHwUnitName;
1218
+ } NVPW_MetricsEvaluator_HwUnitToString_Params;
1219
+ #define NVPW_MetricsEvaluator_HwUnitToString_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_HwUnitToString_Params, pHwUnitName)
1220
+
1221
+ NVPA_Status NVPW_MetricsEvaluator_HwUnitToString(NVPW_MetricsEvaluator_HwUnitToString_Params* pParams);
1222
+
1223
+ typedef struct NVPW_MetricsEvaluator_GetCounterProperties_Params
1224
+ {
1225
+ /// [in]
1226
+ size_t structSize;
1227
+ /// [in] assign to NULL
1228
+ void* pPriv;
1229
+ /// [in]
1230
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1231
+ /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
1232
+ size_t counterIndex;
1233
+ /// [out]
1234
+ const char* pDescription;
1235
+ /// [out] one of 'NVPW_HwUnit'
1236
+ uint32_t hwUnit;
1237
+ } NVPW_MetricsEvaluator_GetCounterProperties_Params;
1238
+ #define NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetCounterProperties_Params, hwUnit)
1239
+
1240
+ NVPA_Status NVPW_MetricsEvaluator_GetCounterProperties(NVPW_MetricsEvaluator_GetCounterProperties_Params* pParams);
1241
+
1242
+ typedef struct NVPW_MetricsEvaluator_GetRatioMetricProperties_Params
1243
+ {
1244
+ /// [in]
1245
+ size_t structSize;
1246
+ /// [in] assign to NULL
1247
+ void* pPriv;
1248
+ /// [in]
1249
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1250
+ /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
1251
+ size_t ratioMetricIndex;
1252
+ /// [out]
1253
+ const char* pDescription;
1254
+ /// [out]
1255
+ uint64_t hwUnit;
1256
+ } NVPW_MetricsEvaluator_GetRatioMetricProperties_Params;
1257
+ #define NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetRatioMetricProperties_Params, hwUnit)
1258
+
1259
+ NVPA_Status NVPW_MetricsEvaluator_GetRatioMetricProperties(NVPW_MetricsEvaluator_GetRatioMetricProperties_Params* pParams);
1260
+
1261
+ typedef struct NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params
1262
+ {
1263
+ /// [in]
1264
+ size_t structSize;
1265
+ /// [in] assign to NULL
1266
+ void* pPriv;
1267
+ /// [in]
1268
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1269
+ /// [in] the metric index as in 'NVPW_MetricsEvaluator_GetMetricNames'
1270
+ size_t throughputMetricIndex;
1271
+ /// [out]
1272
+ const char* pDescription;
1273
+ /// [out]
1274
+ uint32_t hwUnit;
1275
+ /// [out] number of constituent counters for the throughput metric
1276
+ size_t numCounters;
1277
+ /// [out] metric indices as in 'NVPW_MetricsEvaluator_GetMetricNames', valid if 'numCounters' > 0, otherwise
1278
+ /// returned as nullptr
1279
+ const size_t* pCounterIndices;
1280
+ /// [out] number of constituent sub-throughputs for the throughput metric
1281
+ size_t numSubThroughputs;
1282
+ /// [out] metric indices as in 'NVPW_MetricsEvaluator_GetMetricNames', valid if 'numSubThroughputs' > 0,
1283
+ /// otherwise returned as nullptr
1284
+ const size_t* pSubThroughputIndices;
1285
+ } NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params;
1286
+ #define NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params, pSubThroughputIndices)
1287
+
1288
+ NVPA_Status NVPW_MetricsEvaluator_GetThroughputMetricProperties(NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params* pParams);
1289
+
1290
+ typedef struct NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params
1291
+ {
1292
+ /// [in]
1293
+ size_t structSize;
1294
+ /// [in] assign to NULL
1295
+ void* pPriv;
1296
+ /// [in]
1297
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1298
+ /// [in] one of 'NVPW_MetricType'
1299
+ uint8_t metricType;
1300
+ /// [out] an array of 'NVPW_Submetric'
1301
+ const uint16_t* pSupportedSubmetrics;
1302
+ /// [out]
1303
+ size_t numSupportedSubmetrics;
1304
+ } NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params;
1305
+ #define NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params, numSupportedSubmetrics)
1306
+
1307
+ NVPA_Status NVPW_MetricsEvaluator_GetSupportedSubmetrics(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params* pParams);
1308
+
1309
+ typedef struct NVPW_MetricsEvaluator_GetMetricRawDependencies_Params
1310
+ {
1311
+ /// [in]
1312
+ size_t structSize;
1313
+ /// [in] assign to NULL
1314
+ void* pPriv;
1315
+ /// [in]
1316
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1317
+ /// [in]
1318
+ const struct NVPW_MetricEvalRequest* pMetricEvalRequests;
1319
+ /// [in]
1320
+ size_t numMetricEvalRequests;
1321
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
1322
+ size_t metricEvalRequestStructSize;
1323
+ /// [in] set to sizeof('NVPW_MetricEvalRequest')
1324
+ size_t metricEvalRequestStrideSize;
1325
+ /// [inout] 'ppRawDependencies' is in, '*ppRawDependencies' is out
1326
+ const char** ppRawDependencies;
1327
+ /// [inout] if 'ppRawDependencies' is NULL, number of raw dependencies available will be returned; otherwise it
1328
+ /// should be set to the number of elements allocated for 'ppRawDependencies', and on return, it will be
1329
+ /// overwritten by number of elements copied to 'ppRawDependencies'
1330
+ size_t numRawDependencies;
1331
+ /// [inout] 'ppOptionalRawDependencies' is in, '*ppOptionalRawDependencies' is out
1332
+ const char** ppOptionalRawDependencies;
1333
+ /// [inout] if 'ppOptionalRawDependencies' is NULL, number of optional raw dependencies available will be
1334
+ /// returned; otherwise it should be set to the number of elements allocated for 'ppOptionalRawDependencies',
1335
+ /// and on return, it will be overwritten by number of elements copied to 'ppOptionalRawDependencies'
1336
+ size_t numOptionalRawDependencies;
1337
+ } NVPW_MetricsEvaluator_GetMetricRawDependencies_Params;
1338
+ #define NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricRawDependencies_Params, numOptionalRawDependencies)
1339
+
1340
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricRawDependencies(NVPW_MetricsEvaluator_GetMetricRawDependencies_Params* pParams);
1341
+
1342
+ typedef struct NVPW_MetricsEvaluator_DimUnitToString_Params
1343
+ {
1344
+ /// [in]
1345
+ size_t structSize;
1346
+ /// [in] assign to NULL
1347
+ void* pPriv;
1348
+ /// [in]
1349
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1350
+ /// [in] one of 'NVPW_DimUnitName'
1351
+ uint32_t dimUnit;
1352
+ /// [out]
1353
+ const char* pSingularName;
1354
+ /// [out]
1355
+ const char* pPluralName;
1356
+ } NVPW_MetricsEvaluator_DimUnitToString_Params;
1357
+ #define NVPW_MetricsEvaluator_DimUnitToString_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_DimUnitToString_Params, pPluralName)
1358
+
1359
+ NVPA_Status NVPW_MetricsEvaluator_DimUnitToString(NVPW_MetricsEvaluator_DimUnitToString_Params* pParams);
1360
+
1361
+ typedef struct NVPW_MetricsEvaluator_GetMetricDimUnits_Params
1362
+ {
1363
+ /// [in]
1364
+ size_t structSize;
1365
+ /// [in] assign to NULL
1366
+ void* pPriv;
1367
+ /// [in]
1368
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1369
+ /// [in]
1370
+ const struct NVPW_MetricEvalRequest* pMetricEvalRequest;
1371
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
1372
+ size_t metricEvalRequestStructSize;
1373
+ /// [inout] 'pDimUnits' is in, '*pDimUnits' is out
1374
+ NVPW_DimUnitFactor* pDimUnits;
1375
+ /// [inout] if 'pDimUnits' is NULL, number of dim-units available will be returned; otherwise it should be set
1376
+ /// to the number of elements allocated for 'pDimUnits', and on return, it will be overwritten by number of
1377
+ /// elements copied to 'pDimUnits'
1378
+ size_t numDimUnits;
1379
+ /// [in] set to 'NVPW_DimUnitFactor_STRUCT_SIZE'
1380
+ size_t dimUnitFactorStructSize;
1381
+ } NVPW_MetricsEvaluator_GetMetricDimUnits_Params;
1382
+ #define NVPW_MetricsEvaluator_GetMetricDimUnits_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_GetMetricDimUnits_Params, dimUnitFactorStructSize)
1383
+
1384
+ NVPA_Status NVPW_MetricsEvaluator_GetMetricDimUnits(NVPW_MetricsEvaluator_GetMetricDimUnits_Params* pParams);
1385
+
1386
+ typedef struct NVPW_MetricsEvaluator_SetUserData_Params
1387
+ {
1388
+ /// [in]
1389
+ size_t structSize;
1390
+ /// [in] assign to NULL
1391
+ void* pPriv;
1392
+ /// [in]
1393
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1394
+ /// [in] duration in ns of user defined frame
1395
+ double frameDuration;
1396
+ /// [in] duration in ns of user defined region
1397
+ double regionDuration;
1398
+ /// [in]
1399
+ NVPA_Bool isolated;
1400
+ } NVPW_MetricsEvaluator_SetUserData_Params;
1401
+ #define NVPW_MetricsEvaluator_SetUserData_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_SetUserData_Params, isolated)
1402
+
1403
+ NVPA_Status NVPW_MetricsEvaluator_SetUserData(NVPW_MetricsEvaluator_SetUserData_Params* pParams);
1404
+
1405
+ typedef struct NVPW_MetricsEvaluator_EvaluateToGpuValues_Params
1406
+ {
1407
+ /// [in]
1408
+ size_t structSize;
1409
+ /// [in] assign to NULL
1410
+ void* pPriv;
1411
+ /// [in]
1412
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1413
+ /// [in]
1414
+ const struct NVPW_MetricEvalRequest* pMetricEvalRequests;
1415
+ /// [in]
1416
+ size_t numMetricEvalRequests;
1417
+ /// [in] set to 'NVPW_MetricEvalRequest_STRUCT_SIZE'
1418
+ size_t metricEvalRequestStructSize;
1419
+ /// [in] set to sizeof('NVPW_MetricEvalRequest')
1420
+ size_t metricEvalRequestStrideSize;
1421
+ /// [in]
1422
+ const uint8_t* pCounterDataImage;
1423
+ /// [in]
1424
+ size_t counterDataImageSize;
1425
+ /// [in]
1426
+ size_t rangeIndex;
1427
+ /// [in]
1428
+ NVPA_Bool isolated;
1429
+ /// [inout] 'pMetricValues' is in, '*pMetricValues' is out
1430
+ double* pMetricValues;
1431
+ } NVPW_MetricsEvaluator_EvaluateToGpuValues_Params;
1432
+ #define NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_EvaluateToGpuValues_Params, pMetricValues)
1433
+
1434
+ NVPA_Status NVPW_MetricsEvaluator_EvaluateToGpuValues(NVPW_MetricsEvaluator_EvaluateToGpuValues_Params* pParams);
1435
+
1436
+ typedef struct NVPW_MetricsEvaluator_SetDeviceAttributes_Params
1437
+ {
1438
+ /// [in]
1439
+ size_t structSize;
1440
+ /// [in] assign to NULL
1441
+ void* pPriv;
1442
+ /// [in]
1443
+ struct NVPW_MetricsEvaluator* pMetricsEvaluator;
1444
+ /// [in]
1445
+ const uint8_t* pCounterDataImage;
1446
+ /// [in]
1447
+ size_t counterDataImageSize;
1448
+ } NVPW_MetricsEvaluator_SetDeviceAttributes_Params;
1449
+ #define NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_MetricsEvaluator_SetDeviceAttributes_Params, counterDataImageSize)
1450
+
1451
+ NVPA_Status NVPW_MetricsEvaluator_SetDeviceAttributes(NVPW_MetricsEvaluator_SetDeviceAttributes_Params* pParams);
1452
+
1453
+ /**
1454
+ * @}
1455
+ ******************************************************************************/
1456
+
1457
+
1458
+ #endif // NVPERF_HOST_API_DEFINED
1459
+
1460
+
1461
+
1462
+
1463
+ #ifdef __cplusplus
1464
+ } // extern "C"
1465
+ #endif
1466
+
1467
+ #if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
1468
+ #pragma GCC visibility pop
1469
+ #endif
1470
+
1471
+ #endif // NVPERF_HOST_H
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/__init__.py ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (216 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__init__.py ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (224 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverMg.h ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2019 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(CUSOLVERMG_H_)
51
+ #define CUSOLVERMG_H_
52
+
53
+ #include <stdint.h>
54
+ #include "cusolverDn.h"
55
+
56
+ #if defined(__cplusplus)
57
+ extern "C" {
58
+ #endif /* __cplusplus */
59
+
60
+ struct cusolverMgContext;
61
+ typedef struct cusolverMgContext *cusolverMgHandle_t;
62
+
63
+ /**
64
+ * \beief This enum decides how 1D device Ids (or process ranks) get mapped to
65
+ * a 2D grid.
66
+ */
67
+ typedef enum {
68
+
69
+ CUDALIBMG_GRID_MAPPING_ROW_MAJOR = 1,
70
+ CUDALIBMG_GRID_MAPPING_COL_MAJOR = 0
71
+
72
+ } cusolverMgGridMapping_t;
73
+
74
+ /** \brief Opaque structure of the distributed grid */
75
+ typedef void *cudaLibMgGrid_t;
76
+ /** \brief Opaque structure of the distributed matrix descriptor */
77
+ typedef void *cudaLibMgMatrixDesc_t;
78
+
79
+ cusolverStatus_t CUSOLVERAPI cusolverMgCreate(cusolverMgHandle_t *handle);
80
+
81
+ cusolverStatus_t CUSOLVERAPI cusolverMgDestroy(cusolverMgHandle_t handle);
82
+
83
+ cusolverStatus_t CUSOLVERAPI cusolverMgDeviceSelect(
84
+ cusolverMgHandle_t handle,
85
+ int nbDevices,
86
+ int deviceId[]);
87
+
88
+ /**
89
+ * \brief Allocates resources related to the shared memory device grid.
90
+ * \param[out] grid the opaque data strcuture that holds the grid
91
+ * \param[in] numRowDevices number of devices in the row
92
+ * \param[in] numColDevices number of devices in the column
93
+ * \param[in] deviceId This array of size height * width stores the
94
+ * device-ids of the 2D grid; each entry must correspond to a valid
95
+ * gpu or to -1 (denoting CPU). \param[in] mapping whether the 2D grid is in
96
+ * row/column major \returns the status code
97
+ */
98
+ cusolverStatus_t CUSOLVERAPI cusolverMgCreateDeviceGrid(
99
+ cudaLibMgGrid_t * grid,
100
+ int32_t numRowDevices,
101
+ int32_t numColDevices,
102
+ const int32_t deviceId[],
103
+ cusolverMgGridMapping_t mapping);
104
+
105
+ /**
106
+ * \brief Releases the allocated resources related to the distributed grid.
107
+ * \param[in] grid the opaque data strcuture that holds the distributed grid
108
+ * \returns the status code
109
+ */
110
+ cusolverStatus_t CUSOLVERAPI cusolverMgDestroyGrid(cudaLibMgGrid_t grid);
111
+
112
+ /**
113
+ * \brief Allocates resources related to the distributed matrix descriptor.
114
+ * \param[out] desc the opaque data strcuture that holds the descriptor
115
+ * \param[in] numRows number of total rows
116
+ * \param[in] numCols number of total columns
117
+ * \param[in] rowBlockSize row block size
118
+ * \param[in] colBlockSize column block size
119
+ * \param[in] dataType the data type of each element in cudaDataType
120
+ * \param[in] grid the opaque data structure of the distributed grid
121
+ * \returns the status code
122
+ */
123
+ cusolverStatus_t CUSOLVERAPI cusolverMgCreateMatrixDesc(
124
+ cudaLibMgMatrixDesc_t *desc,
125
+ int64_t numRows,
126
+ int64_t numCols,
127
+ int64_t rowBlockSize,
128
+ int64_t colBlockSize,
129
+ cudaDataType dataType,
130
+ const cudaLibMgGrid_t grid);
131
+
132
+ /**
133
+ * \brief Releases the allocated resources related to the distributed matrix
134
+ * descriptor. \param[in] desc the opaque data strcuture that holds the
135
+ * descriptor \returns the status code
136
+ */
137
+ cusolverStatus_t CUSOLVERAPI
138
+ cusolverMgDestroyMatrixDesc(cudaLibMgMatrixDesc_t desc);
139
+
140
+ cusolverStatus_t CUSOLVERAPI cusolverMgSyevd_bufferSize(
141
+ cusolverMgHandle_t handle,
142
+ cusolverEigMode_t jobz,
143
+ cublasFillMode_t uplo,
144
+ int N,
145
+ void * array_d_A[],
146
+ int IA,
147
+ int JA,
148
+ cudaLibMgMatrixDesc_t descrA,
149
+ void * W,
150
+ cudaDataType dataTypeW,
151
+ cudaDataType computeType,
152
+ int64_t * lwork);
153
+
154
+ cusolverStatus_t CUSOLVERAPI cusolverMgSyevd(
155
+ cusolverMgHandle_t handle,
156
+ cusolverEigMode_t jobz,
157
+ cublasFillMode_t uplo,
158
+ int N,
159
+ void * array_d_A[],
160
+ int IA,
161
+ int JA,
162
+ cudaLibMgMatrixDesc_t descrA,
163
+ void * W,
164
+ cudaDataType dataTypeW,
165
+ cudaDataType computeType,
166
+ void * array_d_work[],
167
+ int64_t lwork,
168
+ int * info);
169
+
170
+ cusolverStatus_t CUSOLVERAPI cusolverMgGetrf_bufferSize(
171
+ cusolverMgHandle_t handle,
172
+ int M,
173
+ int N,
174
+ void * array_d_A[],
175
+ int IA,
176
+ int JA,
177
+ cudaLibMgMatrixDesc_t descrA,
178
+ int * array_d_IPIV[],
179
+ cudaDataType computeType,
180
+ int64_t * lwork);
181
+
182
+ cusolverStatus_t CUSOLVERAPI cusolverMgGetrf(
183
+ cusolverMgHandle_t handle,
184
+ int M,
185
+ int N,
186
+ void * array_d_A[],
187
+ int IA,
188
+ int JA,
189
+ cudaLibMgMatrixDesc_t descrA,
190
+ int * array_d_IPIV[],
191
+ cudaDataType computeType,
192
+ void * array_d_work[],
193
+ int64_t lwork,
194
+ int * info);
195
+
196
+ cusolverStatus_t CUSOLVERAPI cusolverMgGetrs_bufferSize(
197
+ cusolverMgHandle_t handle,
198
+ cublasOperation_t TRANS,
199
+ int N,
200
+ int NRHS,
201
+ void * array_d_A[],
202
+ int IA,
203
+ int JA,
204
+ cudaLibMgMatrixDesc_t descrA,
205
+ int * array_d_IPIV[],
206
+ void * array_d_B[],
207
+ int IB,
208
+ int JB,
209
+ cudaLibMgMatrixDesc_t descrB,
210
+ cudaDataType computeType,
211
+ int64_t * lwork);
212
+
213
+ cusolverStatus_t CUSOLVERAPI cusolverMgGetrs(
214
+ cusolverMgHandle_t handle,
215
+ cublasOperation_t TRANS,
216
+ int N,
217
+ int NRHS,
218
+ void * array_d_A[],
219
+ int IA,
220
+ int JA,
221
+ cudaLibMgMatrixDesc_t descrA,
222
+ int * array_d_IPIV[],
223
+ void * array_d_B[],
224
+ int IB,
225
+ int JB,
226
+ cudaLibMgMatrixDesc_t descrB,
227
+ cudaDataType computeType,
228
+ void * array_d_work[],
229
+ int64_t lwork,
230
+ int * info);
231
+
232
+ cusolverStatus_t CUSOLVERAPI cusolverMgPotrf_bufferSize(
233
+ cusolverMgHandle_t handle,
234
+ cublasFillMode_t uplo,
235
+ int N,
236
+ void * array_d_A[],
237
+ int IA,
238
+ int JA,
239
+ cudaLibMgMatrixDesc_t descrA,
240
+ cudaDataType computeType,
241
+ int64_t * lwork);
242
+
243
+ cusolverStatus_t CUSOLVERAPI cusolverMgPotrf(
244
+ cusolverMgHandle_t handle,
245
+ cublasFillMode_t uplo,
246
+ int N,
247
+ void * array_d_A[],
248
+ int IA,
249
+ int JA,
250
+ cudaLibMgMatrixDesc_t descrA,
251
+ cudaDataType computeType,
252
+ void * array_d_work[],
253
+ int64_t lwork,
254
+ int * h_info);
255
+
256
+ cusolverStatus_t CUSOLVERAPI cusolverMgPotrs_bufferSize(
257
+ cusolverMgHandle_t handle,
258
+ cublasFillMode_t uplo,
259
+ int n,
260
+ int nrhs,
261
+ void * array_d_A[],
262
+ int IA,
263
+ int JA,
264
+ cudaLibMgMatrixDesc_t descrA,
265
+ void * array_d_B[],
266
+ int IB,
267
+ int JB,
268
+ cudaLibMgMatrixDesc_t descrB,
269
+ cudaDataType computeType,
270
+ int64_t * lwork);
271
+
272
+ cusolverStatus_t CUSOLVERAPI cusolverMgPotrs(
273
+ cusolverMgHandle_t handle,
274
+ cublasFillMode_t uplo,
275
+ int n,
276
+ int nrhs,
277
+ void * array_d_A[],
278
+ int IA,
279
+ int JA,
280
+ cudaLibMgMatrixDesc_t descrA,
281
+ void * array_d_B[],
282
+ int IB,
283
+ int JB,
284
+ cudaLibMgMatrixDesc_t descrB,
285
+ cudaDataType computeType,
286
+ void * array_d_work[],
287
+ int64_t lwork,
288
+ int * h_info);
289
+
290
+ cusolverStatus_t CUSOLVERAPI cusolverMgPotri_bufferSize(
291
+ cusolverMgHandle_t handle,
292
+ cublasFillMode_t uplo,
293
+ int N,
294
+ void * array_d_A[],
295
+ int IA,
296
+ int JA,
297
+ cudaLibMgMatrixDesc_t descrA,
298
+ cudaDataType computeType,
299
+ int64_t * lwork);
300
+
301
+ cusolverStatus_t CUSOLVERAPI cusolverMgPotri(
302
+ cusolverMgHandle_t handle,
303
+ cublasFillMode_t uplo,
304
+ int N,
305
+ void * array_d_A[],
306
+ int IA,
307
+ int JA,
308
+ cudaLibMgMatrixDesc_t descrA,
309
+ cudaDataType computeType,
310
+ void * array_d_work[],
311
+ int64_t lwork,
312
+ int * h_info);
313
+
314
+ #if defined(__cplusplus)
315
+ }
316
+ #endif /* __cplusplus */
317
+
318
+ #endif // CUSOLVERMG_H_
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverRf.h ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(CUSOLVERRF_H_)
51
+ #define CUSOLVERRF_H_
52
+
53
+ #include "driver_types.h"
54
+ #include "cuComplex.h"
55
+ #include "cusolver_common.h"
56
+
57
+ #if defined(__cplusplus)
58
+ extern "C" {
59
+ #endif /* __cplusplus */
60
+
61
+ /* CUSOLVERRF mode */
62
+ typedef enum {
63
+ CUSOLVERRF_RESET_VALUES_FAST_MODE_OFF = 0, // default
64
+ CUSOLVERRF_RESET_VALUES_FAST_MODE_ON = 1
65
+ } cusolverRfResetValuesFastMode_t;
66
+
67
+ /* CUSOLVERRF matrix format */
68
+ typedef enum {
69
+ CUSOLVERRF_MATRIX_FORMAT_CSR = 0, // default
70
+ CUSOLVERRF_MATRIX_FORMAT_CSC = 1
71
+ } cusolverRfMatrixFormat_t;
72
+
73
+ /* CUSOLVERRF unit diagonal */
74
+ typedef enum {
75
+ CUSOLVERRF_UNIT_DIAGONAL_STORED_L = 0, // default
76
+ CUSOLVERRF_UNIT_DIAGONAL_STORED_U = 1,
77
+ CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_L = 2,
78
+ CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_U = 3
79
+ } cusolverRfUnitDiagonal_t;
80
+
81
+ /* CUSOLVERRF factorization algorithm */
82
+ typedef enum {
83
+ CUSOLVERRF_FACTORIZATION_ALG0 = 0, // default
84
+ CUSOLVERRF_FACTORIZATION_ALG1 = 1,
85
+ CUSOLVERRF_FACTORIZATION_ALG2 = 2,
86
+ } cusolverRfFactorization_t;
87
+
88
+ /* CUSOLVERRF triangular solve algorithm */
89
+ typedef enum {
90
+ CUSOLVERRF_TRIANGULAR_SOLVE_ALG1 = 1, // default
91
+ CUSOLVERRF_TRIANGULAR_SOLVE_ALG2 = 2,
92
+ CUSOLVERRF_TRIANGULAR_SOLVE_ALG3 = 3
93
+ } cusolverRfTriangularSolve_t;
94
+
95
+ /* CUSOLVERRF numeric boost report */
96
+ typedef enum {
97
+ CUSOLVERRF_NUMERIC_BOOST_NOT_USED = 0, // default
98
+ CUSOLVERRF_NUMERIC_BOOST_USED = 1
99
+ } cusolverRfNumericBoostReport_t;
100
+
101
+ /* Opaque structure holding CUSOLVERRF library common */
102
+ struct cusolverRfCommon;
103
+ typedef struct cusolverRfCommon* cusolverRfHandle_t;
104
+
105
+ /* CUSOLVERRF create (allocate memory) and destroy (free memory) in the handle
106
+ */
107
+ cusolverStatus_t CUSOLVERAPI cusolverRfCreate(cusolverRfHandle_t* handle);
108
+ cusolverStatus_t CUSOLVERAPI cusolverRfDestroy(cusolverRfHandle_t handle);
109
+
110
+ /* CUSOLVERRF set and get input format */
111
+ cusolverStatus_t CUSOLVERAPI cusolverRfGetMatrixFormat(
112
+ cusolverRfHandle_t handle,
113
+ cusolverRfMatrixFormat_t* format,
114
+ cusolverRfUnitDiagonal_t* diag);
115
+
116
+ cusolverStatus_t CUSOLVERAPI cusolverRfSetMatrixFormat(
117
+ cusolverRfHandle_t handle,
118
+ cusolverRfMatrixFormat_t format,
119
+ cusolverRfUnitDiagonal_t diag);
120
+
121
+ /* CUSOLVERRF set and get numeric properties */
122
+ cusolverStatus_t CUSOLVERAPI cusolverRfSetNumericProperties(
123
+ cusolverRfHandle_t handle,
124
+ double zero,
125
+ double boost);
126
+
127
+ cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericProperties(
128
+ cusolverRfHandle_t handle,
129
+ double* zero,
130
+ double* boost);
131
+
132
+ cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericBoostReport(
133
+ cusolverRfHandle_t handle,
134
+ cusolverRfNumericBoostReport_t* report);
135
+
136
+ /* CUSOLVERRF choose the triangular solve algorithm */
137
+ cusolverStatus_t CUSOLVERAPI cusolverRfSetAlgs(
138
+ cusolverRfHandle_t handle,
139
+ cusolverRfFactorization_t factAlg,
140
+ cusolverRfTriangularSolve_t solveAlg);
141
+
142
+ cusolverStatus_t CUSOLVERAPI cusolverRfGetAlgs(
143
+ cusolverRfHandle_t handle,
144
+ cusolverRfFactorization_t* factAlg,
145
+ cusolverRfTriangularSolve_t* solveAlg);
146
+
147
+ /* CUSOLVERRF set and get fast mode */
148
+ cusolverStatus_t CUSOLVERAPI cusolverRfGetResetValuesFastMode(
149
+ cusolverRfHandle_t handle,
150
+ cusolverRfResetValuesFastMode_t* fastMode);
151
+
152
+ cusolverStatus_t CUSOLVERAPI cusolverRfSetResetValuesFastMode(
153
+ cusolverRfHandle_t handle,
154
+ cusolverRfResetValuesFastMode_t fastMode);
155
+
156
+ /*** Non-Batched Routines ***/
157
+ /* CUSOLVERRF setup of internal structures from host or device memory */
158
+ cusolverStatus_t CUSOLVERAPI
159
+ cusolverRfSetupHost(/* Input (in the host memory) */
160
+ int n,
161
+ int nnzA,
162
+ int* h_csrRowPtrA,
163
+ int* h_csrColIndA,
164
+ double* h_csrValA,
165
+ int nnzL,
166
+ int* h_csrRowPtrL,
167
+ int* h_csrColIndL,
168
+ double* h_csrValL,
169
+ int nnzU,
170
+ int* h_csrRowPtrU,
171
+ int* h_csrColIndU,
172
+ double* h_csrValU,
173
+ int* h_P,
174
+ int* h_Q,
175
+ /* Output */
176
+ cusolverRfHandle_t handle);
177
+
178
+ cusolverStatus_t CUSOLVERAPI
179
+ cusolverRfSetupDevice(/* Input (in the device memory) */
180
+ int n,
181
+ int nnzA,
182
+ int* csrRowPtrA,
183
+ int* csrColIndA,
184
+ double* csrValA,
185
+ int nnzL,
186
+ int* csrRowPtrL,
187
+ int* csrColIndL,
188
+ double* csrValL,
189
+ int nnzU,
190
+ int* csrRowPtrU,
191
+ int* csrColIndU,
192
+ double* csrValU,
193
+ int* P,
194
+ int* Q,
195
+ /* Output */
196
+ cusolverRfHandle_t handle);
197
+
198
+ /* CUSOLVERRF update the matrix values (assuming the reordering, pivoting
199
+ and consequently the sparsity pattern of L and U did not change),
200
+ and zero out the remaining values. */
201
+ cusolverStatus_t CUSOLVERAPI
202
+ cusolverRfResetValues(/* Input (in the device memory) */
203
+ int n,
204
+ int nnzA,
205
+ int* csrRowPtrA,
206
+ int* csrColIndA,
207
+ double* csrValA,
208
+ int* P,
209
+ int* Q,
210
+ /* Output */
211
+ cusolverRfHandle_t handle);
212
+
213
+ /* CUSOLVERRF analysis (for parallelism) */
214
+ cusolverStatus_t CUSOLVERAPI cusolverRfAnalyze(cusolverRfHandle_t handle);
215
+
216
+ /* CUSOLVERRF re-factorization (for parallelism) */
217
+ cusolverStatus_t CUSOLVERAPI cusolverRfRefactor(cusolverRfHandle_t handle);
218
+
219
+ /* CUSOLVERRF extraction: Get L & U packed into a single matrix M */
220
+ cusolverStatus_t CUSOLVERAPI
221
+ cusolverRfAccessBundledFactorsDevice(/* Input */
222
+ cusolverRfHandle_t handle,
223
+ /* Output (in the host memory) */
224
+ int* nnzM,
225
+ /* Output (in the device memory) */
226
+ int** Mp,
227
+ int** Mi,
228
+ double** Mx);
229
+
230
+ cusolverStatus_t CUSOLVERAPI
231
+ cusolverRfExtractBundledFactorsHost(/* Input */
232
+ cusolverRfHandle_t handle,
233
+ /* Output (in the host memory) */
234
+ int* h_nnzM,
235
+ int** h_Mp,
236
+ int** h_Mi,
237
+ double** h_Mx);
238
+
239
+ /* CUSOLVERRF extraction: Get L & U individually */
240
+ cusolverStatus_t CUSOLVERAPI
241
+ cusolverRfExtractSplitFactorsHost(/* Input */
242
+ cusolverRfHandle_t handle,
243
+ /* Output (in the host memory) */
244
+ int* h_nnzL,
245
+ int** h_csrRowPtrL,
246
+ int** h_csrColIndL,
247
+ double** h_csrValL,
248
+ int* h_nnzU,
249
+ int** h_csrRowPtrU,
250
+ int** h_csrColIndU,
251
+ double** h_csrValU);
252
+
253
+ /* CUSOLVERRF (forward and backward triangular) solves */
254
+ cusolverStatus_t CUSOLVERAPI
255
+ cusolverRfSolve(/* Input (in the device memory) */
256
+ cusolverRfHandle_t handle,
257
+ int* P,
258
+ int* Q,
259
+ int nrhs, // only nrhs=1 is supported
260
+ double* Temp, // of size ldt*nrhs (ldt>=n)
261
+ int ldt,
262
+ /* Input/Output (in the device memory) */
263
+ double* XF,
264
+ /* Input */
265
+ int ldxf);
266
+
267
+ /*** Batched Routines ***/
268
+ /* CUSOLVERRF-batch setup of internal structures from host */
269
+ cusolverStatus_t CUSOLVERAPI
270
+ cusolverRfBatchSetupHost(/* Input (in the host memory)*/
271
+ int batchSize,
272
+ int n,
273
+ int nnzA,
274
+ int* h_csrRowPtrA,
275
+ int* h_csrColIndA,
276
+ double* h_csrValA_array[],
277
+ int nnzL,
278
+ int* h_csrRowPtrL,
279
+ int* h_csrColIndL,
280
+ double* h_csrValL,
281
+ int nnzU,
282
+ int* h_csrRowPtrU,
283
+ int* h_csrColIndU,
284
+ double* h_csrValU,
285
+ int* h_P,
286
+ int* h_Q,
287
+ /* Output (in the device memory) */
288
+ cusolverRfHandle_t handle);
289
+
290
+ /* CUSOLVERRF-batch update the matrix values (assuming the reordering,
291
+ pivoting and consequently the sparsity pattern of L and U did not change),
292
+ and zero out the remaining values. */
293
+ cusolverStatus_t CUSOLVERAPI
294
+ cusolverRfBatchResetValues(/* Input (in the device memory) */
295
+ int batchSize,
296
+ int n,
297
+ int nnzA,
298
+ int* csrRowPtrA,
299
+ int* csrColIndA,
300
+ double* csrValA_array[],
301
+ int* P,
302
+ int* Q,
303
+ /* Output */
304
+ cusolverRfHandle_t handle);
305
+
306
+ /* CUSOLVERRF-batch analysis (for parallelism) */
307
+ cusolverStatus_t CUSOLVERAPI
308
+ cusolverRfBatchAnalyze(cusolverRfHandle_t handle);
309
+
310
+ /* CUSOLVERRF-batch re-factorization (for parallelism) */
311
+ cusolverStatus_t CUSOLVERAPI
312
+ cusolverRfBatchRefactor(cusolverRfHandle_t handle);
313
+
314
+ /* CUSOLVERRF-batch (forward and backward triangular) solves */
315
+ cusolverStatus_t CUSOLVERAPI
316
+ cusolverRfBatchSolve(/* Input (in the device memory) */
317
+ cusolverRfHandle_t handle,
318
+ int* P,
319
+ int* Q,
320
+ int nrhs, // only nrhs=1 is supported
321
+ double* Temp, // of size 2*batchSize*(n*nrhs)
322
+ int ldt, // only ldt=n is supported
323
+ /* Input/Output (in the device memory) */
324
+ double* XF_array[],
325
+ /* Input */
326
+ int ldxf);
327
+
328
+ /* CUSOLVERRF-batch obtain the position of zero pivot */
329
+ cusolverStatus_t CUSOLVERAPI
330
+ cusolverRfBatchZeroPivot(/* Input */
331
+ cusolverRfHandle_t handle,
332
+ /* Output (in the host memory) */
333
+ int* position);
334
+
335
+ #if defined(__cplusplus)
336
+ }
337
+ #endif /* __cplusplus */
338
+
339
+ #endif /* CUSOLVERRF_H_ */
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/include/cusolverSp_LOWLEVEL_PREVIEW.h ADDED
@@ -0,0 +1,1107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2015 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(CUSOLVERSP_LOWLEVEL_PREVIEW_H_)
51
+ #define CUSOLVERSP_LOWLEVEL_PREVIEW_H_
52
+
53
+ #include "cusolverSp.h"
54
+
55
+ #if defined(__cplusplus)
56
+ extern "C" {
57
+ #endif /* __cplusplus */
58
+
59
+ struct csrluInfoHost;
60
+ typedef struct csrluInfoHost *csrluInfoHost_t;
61
+
62
+ struct csrqrInfoHost;
63
+ typedef struct csrqrInfoHost *csrqrInfoHost_t;
64
+
65
+ struct csrcholInfoHost;
66
+ typedef struct csrcholInfoHost *csrcholInfoHost_t;
67
+
68
+ struct csrcholInfo;
69
+ typedef struct csrcholInfo *csrcholInfo_t;
70
+
71
+ /*
72
+ * Low level API for CPU LU
73
+ *
74
+ */
75
+ cusolverStatus_t CUSOLVERAPI
76
+ cusolverSpCreateCsrluInfoHost(csrluInfoHost_t *info);
77
+
78
+ cusolverStatus_t CUSOLVERAPI
79
+ cusolverSpDestroyCsrluInfoHost(csrluInfoHost_t info);
80
+
81
+ cusolverStatus_t CUSOLVERAPI cusolverSpXcsrluAnalysisHost(
82
+ cusolverSpHandle_t handle,
83
+ int n,
84
+ int nnzA,
85
+ const cusparseMatDescr_t descrA,
86
+ const int * csrRowPtrA,
87
+ const int * csrColIndA,
88
+ csrluInfoHost_t info);
89
+
90
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrluBufferInfoHost(
91
+ cusolverSpHandle_t handle,
92
+ int n,
93
+ int nnzA,
94
+ const cusparseMatDescr_t descrA,
95
+ const float * csrValA,
96
+ const int * csrRowPtrA,
97
+ const int * csrColIndA,
98
+ csrluInfoHost_t info,
99
+ size_t * internalDataInBytes,
100
+ size_t * workspaceInBytes);
101
+
102
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluBufferInfoHost(
103
+ cusolverSpHandle_t handle,
104
+ int n,
105
+ int nnzA,
106
+ const cusparseMatDescr_t descrA,
107
+ const double * csrValA,
108
+ const int * csrRowPtrA,
109
+ const int * csrColIndA,
110
+ csrluInfoHost_t info,
111
+ size_t * internalDataInBytes,
112
+ size_t * workspaceInBytes);
113
+
114
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluBufferInfoHost(
115
+ cusolverSpHandle_t handle,
116
+ int n,
117
+ int nnzA,
118
+ const cusparseMatDescr_t descrA,
119
+ const cuComplex * csrValA,
120
+ const int * csrRowPtrA,
121
+ const int * csrColIndA,
122
+ csrluInfoHost_t info,
123
+ size_t * internalDataInBytes,
124
+ size_t * workspaceInBytes);
125
+
126
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluBufferInfoHost(
127
+ cusolverSpHandle_t handle,
128
+ int n,
129
+ int nnzA,
130
+ const cusparseMatDescr_t descrA,
131
+ const cuDoubleComplex * csrValA,
132
+ const int * csrRowPtrA,
133
+ const int * csrColIndA,
134
+ csrluInfoHost_t info,
135
+ size_t * internalDataInBytes,
136
+ size_t * workspaceInBytes);
137
+
138
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrluFactorHost(
139
+ cusolverSpHandle_t handle,
140
+ int n,
141
+ int nnzA,
142
+ const cusparseMatDescr_t descrA,
143
+ const float * csrValA,
144
+ const int * csrRowPtrA,
145
+ const int * csrColIndA,
146
+ csrluInfoHost_t info,
147
+ float pivot_threshold,
148
+ void * pBuffer);
149
+
150
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluFactorHost(
151
+ cusolverSpHandle_t handle,
152
+ int n,
153
+ int nnzA,
154
+ const cusparseMatDescr_t descrA,
155
+ const double * csrValA,
156
+ const int * csrRowPtrA,
157
+ const int * csrColIndA,
158
+ csrluInfoHost_t info,
159
+ double pivot_threshold,
160
+ void * pBuffer);
161
+
162
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluFactorHost(
163
+ cusolverSpHandle_t handle,
164
+ int n,
165
+ int nnzA,
166
+ const cusparseMatDescr_t descrA,
167
+ const cuComplex * csrValA,
168
+ const int * csrRowPtrA,
169
+ const int * csrColIndA,
170
+ csrluInfoHost_t info,
171
+ float pivot_threshold,
172
+ void * pBuffer);
173
+
174
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluFactorHost(
175
+ cusolverSpHandle_t handle,
176
+ int n,
177
+ int nnzA,
178
+ const cusparseMatDescr_t descrA,
179
+ const cuDoubleComplex * csrValA,
180
+ const int * csrRowPtrA,
181
+ const int * csrColIndA,
182
+ csrluInfoHost_t info,
183
+ double pivot_threshold,
184
+ void * pBuffer);
185
+
186
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrluZeroPivotHost(
187
+ cusolverSpHandle_t handle,
188
+ csrluInfoHost_t info,
189
+ float tol,
190
+ int * position);
191
+
192
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluZeroPivotHost(
193
+ cusolverSpHandle_t handle,
194
+ csrluInfoHost_t info,
195
+ double tol,
196
+ int * position);
197
+
198
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluZeroPivotHost(
199
+ cusolverSpHandle_t handle,
200
+ csrluInfoHost_t info,
201
+ float tol,
202
+ int * position);
203
+
204
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluZeroPivotHost(
205
+ cusolverSpHandle_t handle,
206
+ csrluInfoHost_t info,
207
+ double tol,
208
+ int * position);
209
+
210
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrluSolveHost(
211
+ cusolverSpHandle_t handle,
212
+ int n,
213
+ const float * b,
214
+ float * x,
215
+ csrluInfoHost_t info,
216
+ void * pBuffer);
217
+
218
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluSolveHost(
219
+ cusolverSpHandle_t handle,
220
+ int n,
221
+ const double * b,
222
+ double * x,
223
+ csrluInfoHost_t info,
224
+ void * pBuffer);
225
+
226
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluSolveHost(
227
+ cusolverSpHandle_t handle,
228
+ int n,
229
+ const cuComplex * b,
230
+ cuComplex * x,
231
+ csrluInfoHost_t info,
232
+ void * pBuffer);
233
+
234
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluSolveHost(
235
+ cusolverSpHandle_t handle,
236
+ int n,
237
+ const cuDoubleComplex *b,
238
+ cuDoubleComplex * x,
239
+ csrluInfoHost_t info,
240
+ void * pBuffer);
241
+
242
+ cusolverStatus_t CUSOLVERAPI cusolverSpXcsrluNnzHost(
243
+ cusolverSpHandle_t handle,
244
+ int * nnzLRef,
245
+ int * nnzURef,
246
+ csrluInfoHost_t info);
247
+
248
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrluExtractHost(
249
+ cusolverSpHandle_t handle,
250
+ int * P,
251
+ int * Q,
252
+ const cusparseMatDescr_t descrL,
253
+ float * csrValL,
254
+ int * csrRowPtrL,
255
+ int * csrColIndL,
256
+ const cusparseMatDescr_t descrU,
257
+ float * csrValU,
258
+ int * csrRowPtrU,
259
+ int * csrColIndU,
260
+ csrluInfoHost_t info,
261
+ void * pBuffer);
262
+
263
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrluExtractHost(
264
+ cusolverSpHandle_t handle,
265
+ int * P,
266
+ int * Q,
267
+ const cusparseMatDescr_t descrL,
268
+ double * csrValL,
269
+ int * csrRowPtrL,
270
+ int * csrColIndL,
271
+ const cusparseMatDescr_t descrU,
272
+ double * csrValU,
273
+ int * csrRowPtrU,
274
+ int * csrColIndU,
275
+ csrluInfoHost_t info,
276
+ void * pBuffer);
277
+
278
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrluExtractHost(
279
+ cusolverSpHandle_t handle,
280
+ int * P,
281
+ int * Q,
282
+ const cusparseMatDescr_t descrL,
283
+ cuComplex * csrValL,
284
+ int * csrRowPtrL,
285
+ int * csrColIndL,
286
+ const cusparseMatDescr_t descrU,
287
+ cuComplex * csrValU,
288
+ int * csrRowPtrU,
289
+ int * csrColIndU,
290
+ csrluInfoHost_t info,
291
+ void * pBuffer);
292
+
293
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrluExtractHost(
294
+ cusolverSpHandle_t handle,
295
+ int * P,
296
+ int * Q,
297
+ const cusparseMatDescr_t descrL,
298
+ cuDoubleComplex * csrValL,
299
+ int * csrRowPtrL,
300
+ int * csrColIndL,
301
+ const cusparseMatDescr_t descrU,
302
+ cuDoubleComplex * csrValU,
303
+ int * csrRowPtrU,
304
+ int * csrColIndU,
305
+ csrluInfoHost_t info,
306
+ void * pBuffer);
307
+
308
+ /*
309
+ * Low level API for CPU QR
310
+ *
311
+ */
312
+ cusolverStatus_t CUSOLVERAPI
313
+ cusolverSpCreateCsrqrInfoHost(csrqrInfoHost_t *info);
314
+
315
+ cusolverStatus_t CUSOLVERAPI
316
+ cusolverSpDestroyCsrqrInfoHost(csrqrInfoHost_t info);
317
+
318
+ cusolverStatus_t CUSOLVERAPI cusolverSpXcsrqrAnalysisHost(
319
+ cusolverSpHandle_t handle,
320
+ int m,
321
+ int n,
322
+ int nnzA,
323
+ const cusparseMatDescr_t descrA,
324
+ const int * csrRowPtrA,
325
+ const int * csrColIndA,
326
+ csrqrInfoHost_t info);
327
+
328
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrBufferInfoHost(
329
+ cusolverSpHandle_t handle,
330
+ int m,
331
+ int n,
332
+ int nnzA,
333
+ const cusparseMatDescr_t descrA,
334
+ const float * csrValA,
335
+ const int * csrRowPtrA,
336
+ const int * csrColIndA,
337
+ csrqrInfoHost_t info,
338
+ size_t * internalDataInBytes,
339
+ size_t * workspaceInBytes);
340
+
341
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrBufferInfoHost(
342
+ cusolverSpHandle_t handle,
343
+ int m,
344
+ int n,
345
+ int nnzA,
346
+ const cusparseMatDescr_t descrA,
347
+ const double * csrValA,
348
+ const int * csrRowPtrA,
349
+ const int * csrColIndA,
350
+ csrqrInfoHost_t info,
351
+ size_t * internalDataInBytes,
352
+ size_t * workspaceInBytes);
353
+
354
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrBufferInfoHost(
355
+ cusolverSpHandle_t handle,
356
+ int m,
357
+ int n,
358
+ int nnzA,
359
+ const cusparseMatDescr_t descrA,
360
+ const cuComplex * csrValA,
361
+ const int * csrRowPtrA,
362
+ const int * csrColIndA,
363
+ csrqrInfoHost_t info,
364
+ size_t * internalDataInBytes,
365
+ size_t * workspaceInBytes);
366
+
367
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrBufferInfoHost(
368
+ cusolverSpHandle_t handle,
369
+ int m,
370
+ int n,
371
+ int nnzA,
372
+ const cusparseMatDescr_t descrA,
373
+ const cuDoubleComplex * csrValA,
374
+ const int * csrRowPtrA,
375
+ const int * csrColIndA,
376
+ csrqrInfoHost_t info,
377
+ size_t * internalDataInBytes,
378
+ size_t * workspaceInBytes);
379
+
380
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSetupHost(
381
+ cusolverSpHandle_t handle,
382
+ int m,
383
+ int n,
384
+ int nnzA,
385
+ const cusparseMatDescr_t descrA,
386
+ const float * csrValA,
387
+ const int * csrRowPtrA,
388
+ const int * csrColIndA,
389
+ float mu,
390
+ csrqrInfoHost_t info);
391
+
392
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSetupHost(
393
+ cusolverSpHandle_t handle,
394
+ int m,
395
+ int n,
396
+ int nnzA,
397
+ const cusparseMatDescr_t descrA,
398
+ const double * csrValA,
399
+ const int * csrRowPtrA,
400
+ const int * csrColIndA,
401
+ double mu,
402
+ csrqrInfoHost_t info);
403
+
404
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSetupHost(
405
+ cusolverSpHandle_t handle,
406
+ int m,
407
+ int n,
408
+ int nnzA,
409
+ const cusparseMatDescr_t descrA,
410
+ const cuComplex * csrValA,
411
+ const int * csrRowPtrA,
412
+ const int * csrColIndA,
413
+ cuComplex mu,
414
+ csrqrInfoHost_t info);
415
+
416
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSetupHost(
417
+ cusolverSpHandle_t handle,
418
+ int m,
419
+ int n,
420
+ int nnzA,
421
+ const cusparseMatDescr_t descrA,
422
+ const cuDoubleComplex * csrValA,
423
+ const int * csrRowPtrA,
424
+ const int * csrColIndA,
425
+ cuDoubleComplex mu,
426
+ csrqrInfoHost_t info);
427
+
428
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrFactorHost(
429
+ cusolverSpHandle_t handle,
430
+ int m,
431
+ int n,
432
+ int nnzA,
433
+ float * b,
434
+ float * x,
435
+ csrqrInfoHost_t info,
436
+ void * pBuffer);
437
+
438
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrFactorHost(
439
+ cusolverSpHandle_t handle,
440
+ int m,
441
+ int n,
442
+ int nnzA,
443
+ double * b,
444
+ double * x,
445
+ csrqrInfoHost_t info,
446
+ void * pBuffer);
447
+
448
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrFactorHost(
449
+ cusolverSpHandle_t handle,
450
+ int m,
451
+ int n,
452
+ int nnzA,
453
+ cuComplex * b,
454
+ cuComplex * x,
455
+ csrqrInfoHost_t info,
456
+ void * pBuffer);
457
+
458
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrFactorHost(
459
+ cusolverSpHandle_t handle,
460
+ int m,
461
+ int n,
462
+ int nnzA,
463
+ cuDoubleComplex * b,
464
+ cuDoubleComplex * x,
465
+ csrqrInfoHost_t info,
466
+ void * pBuffer);
467
+
468
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrZeroPivotHost(
469
+ cusolverSpHandle_t handle,
470
+ csrqrInfoHost_t info,
471
+ float tol,
472
+ int * position);
473
+
474
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrZeroPivotHost(
475
+ cusolverSpHandle_t handle,
476
+ csrqrInfoHost_t info,
477
+ double tol,
478
+ int * position);
479
+
480
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrZeroPivotHost(
481
+ cusolverSpHandle_t handle,
482
+ csrqrInfoHost_t info,
483
+ float tol,
484
+ int * position);
485
+
486
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrZeroPivotHost(
487
+ cusolverSpHandle_t handle,
488
+ csrqrInfoHost_t info,
489
+ double tol,
490
+ int * position);
491
+
492
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSolveHost(
493
+ cusolverSpHandle_t handle,
494
+ int m,
495
+ int n,
496
+ float * b,
497
+ float * x,
498
+ csrqrInfoHost_t info,
499
+ void * pBuffer);
500
+
501
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSolveHost(
502
+ cusolverSpHandle_t handle,
503
+ int m,
504
+ int n,
505
+ double * b,
506
+ double * x,
507
+ csrqrInfoHost_t info,
508
+ void * pBuffer);
509
+
510
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSolveHost(
511
+ cusolverSpHandle_t handle,
512
+ int m,
513
+ int n,
514
+ cuComplex * b,
515
+ cuComplex * x,
516
+ csrqrInfoHost_t info,
517
+ void * pBuffer);
518
+
519
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSolveHost(
520
+ cusolverSpHandle_t handle,
521
+ int m,
522
+ int n,
523
+ cuDoubleComplex * b,
524
+ cuDoubleComplex * x,
525
+ csrqrInfoHost_t info,
526
+ void * pBuffer);
527
+
528
+ /*
529
+ * Low level API for GPU QR
530
+ *
531
+ */
532
+ cusolverStatus_t CUSOLVERAPI cusolverSpXcsrqrAnalysis(
533
+ cusolverSpHandle_t handle,
534
+ int m,
535
+ int n,
536
+ int nnzA,
537
+ const cusparseMatDescr_t descrA,
538
+ const int * csrRowPtrA,
539
+ const int * csrColIndA,
540
+ csrqrInfo_t info);
541
+
542
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrBufferInfo(
543
+ cusolverSpHandle_t handle,
544
+ int m,
545
+ int n,
546
+ int nnzA,
547
+ const cusparseMatDescr_t descrA,
548
+ const float * csrValA,
549
+ const int * csrRowPtrA,
550
+ const int * csrColIndA,
551
+ csrqrInfo_t info,
552
+ size_t * internalDataInBytes,
553
+ size_t * workspaceInBytes);
554
+
555
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrBufferInfo(
556
+ cusolverSpHandle_t handle,
557
+ int m,
558
+ int n,
559
+ int nnzA,
560
+ const cusparseMatDescr_t descrA,
561
+ const double * csrValA,
562
+ const int * csrRowPtrA,
563
+ const int * csrColIndA,
564
+ csrqrInfo_t info,
565
+ size_t * internalDataInBytes,
566
+ size_t * workspaceInBytes);
567
+
568
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrBufferInfo(
569
+ cusolverSpHandle_t handle,
570
+ int m,
571
+ int n,
572
+ int nnzA,
573
+ const cusparseMatDescr_t descrA,
574
+ const cuComplex * csrValA,
575
+ const int * csrRowPtrA,
576
+ const int * csrColIndA,
577
+ csrqrInfo_t info,
578
+ size_t * internalDataInBytes,
579
+ size_t * workspaceInBytes);
580
+
581
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrBufferInfo(
582
+ cusolverSpHandle_t handle,
583
+ int m,
584
+ int n,
585
+ int nnzA,
586
+ const cusparseMatDescr_t descrA,
587
+ const cuDoubleComplex * csrValA,
588
+ const int * csrRowPtrA,
589
+ const int * csrColIndA,
590
+ csrqrInfo_t info,
591
+ size_t * internalDataInBytes,
592
+ size_t * workspaceInBytes);
593
+
594
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSetup(
595
+ cusolverSpHandle_t handle,
596
+ int m,
597
+ int n,
598
+ int nnzA,
599
+ const cusparseMatDescr_t descrA,
600
+ const float * csrValA,
601
+ const int * csrRowPtrA,
602
+ const int * csrColIndA,
603
+ float mu,
604
+ csrqrInfo_t info);
605
+
606
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSetup(
607
+ cusolverSpHandle_t handle,
608
+ int m,
609
+ int n,
610
+ int nnzA,
611
+ const cusparseMatDescr_t descrA,
612
+ const double * csrValA,
613
+ const int * csrRowPtrA,
614
+ const int * csrColIndA,
615
+ double mu,
616
+ csrqrInfo_t info);
617
+
618
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSetup(
619
+ cusolverSpHandle_t handle,
620
+ int m,
621
+ int n,
622
+ int nnzA,
623
+ const cusparseMatDescr_t descrA,
624
+ const cuComplex * csrValA,
625
+ const int * csrRowPtrA,
626
+ const int * csrColIndA,
627
+ cuComplex mu,
628
+ csrqrInfo_t info);
629
+
630
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSetup(
631
+ cusolverSpHandle_t handle,
632
+ int m,
633
+ int n,
634
+ int nnzA,
635
+ const cusparseMatDescr_t descrA,
636
+ const cuDoubleComplex * csrValA,
637
+ const int * csrRowPtrA,
638
+ const int * csrColIndA,
639
+ cuDoubleComplex mu,
640
+ csrqrInfo_t info);
641
+
642
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrFactor(
643
+ cusolverSpHandle_t handle,
644
+ int m,
645
+ int n,
646
+ int nnzA,
647
+ float * b,
648
+ float * x,
649
+ csrqrInfo_t info,
650
+ void * pBuffer);
651
+
652
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrFactor(
653
+ cusolverSpHandle_t handle,
654
+ int m,
655
+ int n,
656
+ int nnzA,
657
+ double * b,
658
+ double * x,
659
+ csrqrInfo_t info,
660
+ void * pBuffer);
661
+
662
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrFactor(
663
+ cusolverSpHandle_t handle,
664
+ int m,
665
+ int n,
666
+ int nnzA,
667
+ cuComplex * b,
668
+ cuComplex * x,
669
+ csrqrInfo_t info,
670
+ void * pBuffer);
671
+
672
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrFactor(
673
+ cusolverSpHandle_t handle,
674
+ int m,
675
+ int n,
676
+ int nnzA,
677
+ cuDoubleComplex * b,
678
+ cuDoubleComplex * x,
679
+ csrqrInfo_t info,
680
+ void * pBuffer);
681
+
682
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrZeroPivot(
683
+ cusolverSpHandle_t handle,
684
+ csrqrInfo_t info,
685
+ float tol,
686
+ int * position);
687
+
688
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrZeroPivot(
689
+ cusolverSpHandle_t handle,
690
+ csrqrInfo_t info,
691
+ double tol,
692
+ int * position);
693
+
694
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrZeroPivot(
695
+ cusolverSpHandle_t handle,
696
+ csrqrInfo_t info,
697
+ float tol,
698
+ int * position);
699
+
700
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrZeroPivot(
701
+ cusolverSpHandle_t handle,
702
+ csrqrInfo_t info,
703
+ double tol,
704
+ int * position);
705
+
706
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrSolve(
707
+ cusolverSpHandle_t handle,
708
+ int m,
709
+ int n,
710
+ float * b,
711
+ float * x,
712
+ csrqrInfo_t info,
713
+ void * pBuffer);
714
+
715
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrSolve(
716
+ cusolverSpHandle_t handle,
717
+ int m,
718
+ int n,
719
+ double * b,
720
+ double * x,
721
+ csrqrInfo_t info,
722
+ void * pBuffer);
723
+
724
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrSolve(
725
+ cusolverSpHandle_t handle,
726
+ int m,
727
+ int n,
728
+ cuComplex * b,
729
+ cuComplex * x,
730
+ csrqrInfo_t info,
731
+ void * pBuffer);
732
+
733
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrSolve(
734
+ cusolverSpHandle_t handle,
735
+ int m,
736
+ int n,
737
+ cuDoubleComplex * b,
738
+ cuDoubleComplex * x,
739
+ csrqrInfo_t info,
740
+ void * pBuffer);
741
+
742
+ /*
743
+ * Low level API for CPU Cholesky
744
+ *
745
+ */
746
+ cusolverStatus_t CUSOLVERAPI
747
+ cusolverSpCreateCsrcholInfoHost(csrcholInfoHost_t *info);
748
+
749
+ cusolverStatus_t CUSOLVERAPI
750
+ cusolverSpDestroyCsrcholInfoHost(csrcholInfoHost_t info);
751
+
752
+ cusolverStatus_t CUSOLVERAPI cusolverSpXcsrcholAnalysisHost(
753
+ cusolverSpHandle_t handle,
754
+ int n,
755
+ int nnzA,
756
+ const cusparseMatDescr_t descrA,
757
+ const int * csrRowPtrA,
758
+ const int * csrColIndA,
759
+ csrcholInfoHost_t info);
760
+
761
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholBufferInfoHost(
762
+ cusolverSpHandle_t handle,
763
+ int n,
764
+ int nnzA,
765
+ const cusparseMatDescr_t descrA,
766
+ const float * csrValA,
767
+ const int * csrRowPtrA,
768
+ const int * csrColIndA,
769
+ csrcholInfoHost_t info,
770
+ size_t * internalDataInBytes,
771
+ size_t * workspaceInBytes);
772
+
773
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholBufferInfoHost(
774
+ cusolverSpHandle_t handle,
775
+ int n,
776
+ int nnzA,
777
+ const cusparseMatDescr_t descrA,
778
+ const double * csrValA,
779
+ const int * csrRowPtrA,
780
+ const int * csrColIndA,
781
+ csrcholInfoHost_t info,
782
+ size_t * internalDataInBytes,
783
+ size_t * workspaceInBytes);
784
+
785
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholBufferInfoHost(
786
+ cusolverSpHandle_t handle,
787
+ int n,
788
+ int nnzA,
789
+ const cusparseMatDescr_t descrA,
790
+ const cuComplex * csrValA,
791
+ const int * csrRowPtrA,
792
+ const int * csrColIndA,
793
+ csrcholInfoHost_t info,
794
+ size_t * internalDataInBytes,
795
+ size_t * workspaceInBytes);
796
+
797
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholBufferInfoHost(
798
+ cusolverSpHandle_t handle,
799
+ int n,
800
+ int nnzA,
801
+ const cusparseMatDescr_t descrA,
802
+ const cuDoubleComplex * csrValA,
803
+ const int * csrRowPtrA,
804
+ const int * csrColIndA,
805
+ csrcholInfoHost_t info,
806
+ size_t * internalDataInBytes,
807
+ size_t * workspaceInBytes);
808
+
809
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholFactorHost(
810
+ cusolverSpHandle_t handle,
811
+ int n,
812
+ int nnzA,
813
+ const cusparseMatDescr_t descrA,
814
+ const float * csrValA,
815
+ const int * csrRowPtrA,
816
+ const int * csrColIndA,
817
+ csrcholInfoHost_t info,
818
+ void * pBuffer);
819
+
820
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholFactorHost(
821
+ cusolverSpHandle_t handle,
822
+ int n,
823
+ int nnzA,
824
+ const cusparseMatDescr_t descrA,
825
+ const double * csrValA,
826
+ const int * csrRowPtrA,
827
+ const int * csrColIndA,
828
+ csrcholInfoHost_t info,
829
+ void * pBuffer);
830
+
831
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholFactorHost(
832
+ cusolverSpHandle_t handle,
833
+ int n,
834
+ int nnzA,
835
+ const cusparseMatDescr_t descrA,
836
+ const cuComplex * csrValA,
837
+ const int * csrRowPtrA,
838
+ const int * csrColIndA,
839
+ csrcholInfoHost_t info,
840
+ void * pBuffer);
841
+
842
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholFactorHost(
843
+ cusolverSpHandle_t handle,
844
+ int n,
845
+ int nnzA,
846
+ const cusparseMatDescr_t descrA,
847
+ const cuDoubleComplex * csrValA,
848
+ const int * csrRowPtrA,
849
+ const int * csrColIndA,
850
+ csrcholInfoHost_t info,
851
+ void * pBuffer);
852
+
853
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholZeroPivotHost(
854
+ cusolverSpHandle_t handle,
855
+ csrcholInfoHost_t info,
856
+ float tol,
857
+ int * position);
858
+
859
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholZeroPivotHost(
860
+ cusolverSpHandle_t handle,
861
+ csrcholInfoHost_t info,
862
+ double tol,
863
+ int * position);
864
+
865
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholZeroPivotHost(
866
+ cusolverSpHandle_t handle,
867
+ csrcholInfoHost_t info,
868
+ float tol,
869
+ int * position);
870
+
871
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholZeroPivotHost(
872
+ cusolverSpHandle_t handle,
873
+ csrcholInfoHost_t info,
874
+ double tol,
875
+ int * position);
876
+
877
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholSolveHost(
878
+ cusolverSpHandle_t handle,
879
+ int n,
880
+ const float * b,
881
+ float * x,
882
+ csrcholInfoHost_t info,
883
+ void * pBuffer);
884
+
885
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholSolveHost(
886
+ cusolverSpHandle_t handle,
887
+ int n,
888
+ const double * b,
889
+ double * x,
890
+ csrcholInfoHost_t info,
891
+ void * pBuffer);
892
+
893
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholSolveHost(
894
+ cusolverSpHandle_t handle,
895
+ int n,
896
+ const cuComplex * b,
897
+ cuComplex * x,
898
+ csrcholInfoHost_t info,
899
+ void * pBuffer);
900
+
901
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholSolveHost(
902
+ cusolverSpHandle_t handle,
903
+ int n,
904
+ const cuDoubleComplex *b,
905
+ cuDoubleComplex * x,
906
+ csrcholInfoHost_t info,
907
+ void * pBuffer);
908
+
909
+ /*
910
+ * Low level API for GPU Cholesky
911
+ *
912
+ */
913
+ cusolverStatus_t CUSOLVERAPI cusolverSpCreateCsrcholInfo(csrcholInfo_t *info);
914
+
915
+ cusolverStatus_t CUSOLVERAPI cusolverSpDestroyCsrcholInfo(csrcholInfo_t info);
916
+
917
+ cusolverStatus_t CUSOLVERAPI cusolverSpXcsrcholAnalysis(
918
+ cusolverSpHandle_t handle,
919
+ int n,
920
+ int nnzA,
921
+ const cusparseMatDescr_t descrA,
922
+ const int * csrRowPtrA,
923
+ const int * csrColIndA,
924
+ csrcholInfo_t info);
925
+
926
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholBufferInfo(
927
+ cusolverSpHandle_t handle,
928
+ int n,
929
+ int nnzA,
930
+ const cusparseMatDescr_t descrA,
931
+ const float * csrValA,
932
+ const int * csrRowPtrA,
933
+ const int * csrColIndA,
934
+ csrcholInfo_t info,
935
+ size_t * internalDataInBytes,
936
+ size_t * workspaceInBytes);
937
+
938
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholBufferInfo(
939
+ cusolverSpHandle_t handle,
940
+ int n,
941
+ int nnzA,
942
+ const cusparseMatDescr_t descrA,
943
+ const double * csrValA,
944
+ const int * csrRowPtrA,
945
+ const int * csrColIndA,
946
+ csrcholInfo_t info,
947
+ size_t * internalDataInBytes,
948
+ size_t * workspaceInBytes);
949
+
950
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholBufferInfo(
951
+ cusolverSpHandle_t handle,
952
+ int n,
953
+ int nnzA,
954
+ const cusparseMatDescr_t descrA,
955
+ const cuComplex * csrValA,
956
+ const int * csrRowPtrA,
957
+ const int * csrColIndA,
958
+ csrcholInfo_t info,
959
+ size_t * internalDataInBytes,
960
+ size_t * workspaceInBytes);
961
+
962
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholBufferInfo(
963
+ cusolverSpHandle_t handle,
964
+ int n,
965
+ int nnzA,
966
+ const cusparseMatDescr_t descrA,
967
+ const cuDoubleComplex * csrValA,
968
+ const int * csrRowPtrA,
969
+ const int * csrColIndA,
970
+ csrcholInfo_t info,
971
+ size_t * internalDataInBytes,
972
+ size_t * workspaceInBytes);
973
+
974
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholFactor(
975
+ cusolverSpHandle_t handle,
976
+ int n,
977
+ int nnzA,
978
+ const cusparseMatDescr_t descrA,
979
+ const float * csrValA,
980
+ const int * csrRowPtrA,
981
+ const int * csrColIndA,
982
+ csrcholInfo_t info,
983
+ void * pBuffer);
984
+
985
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholFactor(
986
+ cusolverSpHandle_t handle,
987
+ int n,
988
+ int nnzA,
989
+ const cusparseMatDescr_t descrA,
990
+ const double * csrValA,
991
+ const int * csrRowPtrA,
992
+ const int * csrColIndA,
993
+ csrcholInfo_t info,
994
+ void * pBuffer);
995
+
996
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholFactor(
997
+ cusolverSpHandle_t handle,
998
+ int n,
999
+ int nnzA,
1000
+ const cusparseMatDescr_t descrA,
1001
+ const cuComplex * csrValA,
1002
+ const int * csrRowPtrA,
1003
+ const int * csrColIndA,
1004
+ csrcholInfo_t info,
1005
+ void * pBuffer);
1006
+
1007
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholFactor(
1008
+ cusolverSpHandle_t handle,
1009
+ int n,
1010
+ int nnzA,
1011
+ const cusparseMatDescr_t descrA,
1012
+ const cuDoubleComplex * csrValA,
1013
+ const int * csrRowPtrA,
1014
+ const int * csrColIndA,
1015
+ csrcholInfo_t info,
1016
+ void * pBuffer);
1017
+
1018
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholZeroPivot(
1019
+ cusolverSpHandle_t handle,
1020
+ csrcholInfo_t info,
1021
+ float tol,
1022
+ int * position);
1023
+
1024
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholZeroPivot(
1025
+ cusolverSpHandle_t handle,
1026
+ csrcholInfo_t info,
1027
+ double tol,
1028
+ int * position);
1029
+
1030
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholZeroPivot(
1031
+ cusolverSpHandle_t handle,
1032
+ csrcholInfo_t info,
1033
+ float tol,
1034
+ int * position);
1035
+
1036
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholZeroPivot(
1037
+ cusolverSpHandle_t handle,
1038
+ csrcholInfo_t info,
1039
+ double tol,
1040
+ int * position);
1041
+
1042
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholSolve(
1043
+ cusolverSpHandle_t handle,
1044
+ int n,
1045
+ const float * b,
1046
+ float * x,
1047
+ csrcholInfo_t info,
1048
+ void * pBuffer);
1049
+
1050
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholSolve(
1051
+ cusolverSpHandle_t handle,
1052
+ int n,
1053
+ const double * b,
1054
+ double * x,
1055
+ csrcholInfo_t info,
1056
+ void * pBuffer);
1057
+
1058
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholSolve(
1059
+ cusolverSpHandle_t handle,
1060
+ int n,
1061
+ const cuComplex * b,
1062
+ cuComplex * x,
1063
+ csrcholInfo_t info,
1064
+ void * pBuffer);
1065
+
1066
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholSolve(
1067
+ cusolverSpHandle_t handle,
1068
+ int n,
1069
+ const cuDoubleComplex *b,
1070
+ cuDoubleComplex * x,
1071
+ csrcholInfo_t info,
1072
+ void * pBuffer);
1073
+
1074
+ /*
1075
+ * "diag" is a device array of size N.
1076
+ * cusolverSp<t>csrcholDiag returns diag(L) to "diag" where A(P,P) = L*L**T
1077
+ * "diag" can estimate det(A) because det(A(P,P)) = det(A) = det(L)^2 if A =
1078
+ * L*L**T.
1079
+ *
1080
+ * cusolverSp<t>csrcholDiag must be called after cusolverSp<t>csrcholFactor.
1081
+ * otherwise "diag" is wrong.
1082
+ */
1083
+ cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholDiag(
1084
+ cusolverSpHandle_t handle,
1085
+ csrcholInfo_t info,
1086
+ float * diag);
1087
+
1088
+ cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholDiag(
1089
+ cusolverSpHandle_t handle,
1090
+ csrcholInfo_t info,
1091
+ double * diag);
1092
+
1093
+ cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholDiag(
1094
+ cusolverSpHandle_t handle,
1095
+ csrcholInfo_t info,
1096
+ float * diag);
1097
+
1098
+ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholDiag(
1099
+ cusolverSpHandle_t handle,
1100
+ csrcholInfo_t info,
1101
+ double * diag);
1102
+
1103
+ #if defined(__cplusplus)
1104
+ }
1105
+ #endif /* __cplusplus */
1106
+
1107
+ #endif // CUSOLVERSP_LOWLEVEL_PREVIEW_H_
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cusolver/lib/__init__.py ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (212 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nccl/include/nccl_net.h ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*************************************************************************
2
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
3
+ *
4
+ * See LICENSE.txt for license information
5
+ ************************************************************************/
6
+
7
+ #ifndef NCCL_NET_H_
8
+ #define NCCL_NET_H_
9
+
10
+ #include "nccl.h"
11
+ #include "nccl_common.h"
12
+ #include "net_device.h"
13
+ #include <stdint.h>
14
+
15
+ #define NCCL_NET_HANDLE_MAXSIZE 128
16
+
17
+ #define NCCL_PTR_HOST 0x1
18
+ #define NCCL_PTR_CUDA 0x2
19
+ #define NCCL_PTR_DMABUF 0x4
20
+
21
+ // Maximum number of requests per comm object
22
+ #define NCCL_NET_MAX_REQUESTS 32
23
+
24
+ typedef struct {
25
+ char* name; // Used mostly for logging.
26
+ char* pciPath; // Path to the PCI device in /sys.
27
+ uint64_t guid; // Unique identifier for the NIC chip. Important for
28
+ // cards with multiple PCI functions (Physical or virtual).
29
+ int ptrSupport; // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF]
30
+ int regIsGlobal; // regMr is not tied to a particular comm
31
+ int speed; // Port speed in Mbps.
32
+ int port; // Port number.
33
+ float latency; // Network latency
34
+ int maxComms; // Maximum number of comms we can create
35
+ int maxRecvs; // Maximum number of grouped receives.
36
+ ncclNetDeviceType netDeviceType; // Network offload type
37
+ int netDeviceVersion; // Version number for network offload
38
+ } ncclNetProperties_v8_t;
39
+
40
+ typedef ncclNetProperties_v8_t ncclNetProperties_t;
41
+
42
+ typedef struct {
43
+ // Name of the network (mainly for logs)
44
+ const char* name;
45
+ // Initialize the network.
46
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
47
+ // Return the number of adapters.
48
+ ncclResult_t (*devices)(int* ndev);
49
+ // Get various device properties.
50
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v8_t* props);
51
+ // Create a receiving object and provide a handle to connect to it. The
52
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
53
+ // between ranks to create a connection.
54
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
55
+ // Connect to a handle and return a sending comm object for that peer.
56
+ // This call must not block for the connection to be established, and instead
57
+ // should return successfully with sendComm == NULL with the expectation that
58
+ // it will be called again until sendComm != NULL.
59
+ // If *sendDevComm points to a valid object, then NCCL is requesting device offload for this connection
60
+ ncclResult_t (*connect)(int dev, void* handle, void** sendComm, ncclNetDeviceHandle_v8_t** sendDevComm);
61
+ // Finalize connection establishment after remote peer has called connect.
62
+ // This call must not block for the connection to be established, and instead
63
+ // should return successfully with recvComm == NULL with the expectation that
64
+ // it will be called again until recvComm != NULL.
65
+ // If *recvDevComm points to a valid object, then NCCL is requesting device offload for this connection
66
+ ncclResult_t (*accept)(void* listenComm, void** recvComm, ncclNetDeviceHandle_v8_t** recvDevComm);
67
+ // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
68
+ // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
69
+ ncclResult_t (*regMr)(void* comm, void* data, size_t size, int type, void** mhandle);
70
+ /* DMA-BUF support */
71
+ ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
72
+ ncclResult_t (*deregMr)(void* comm, void* mhandle);
73
+ // Asynchronous send to a peer.
74
+ // May return request == NULL if the call cannot be performed (or would block)
75
+ ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
76
+ // Asynchronous recv from a peer.
77
+ // May return request == NULL if the call cannot be performed (or would block)
78
+ ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
79
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
80
+ // visible to the GPU
81
+ ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
82
+ // Test whether a request is complete. If size is not NULL, it returns the
83
+ // number of bytes sent/received.
84
+ ncclResult_t (*test)(void* request, int* done, int* sizes);
85
+ // Close and free send/recv comm objects
86
+ ncclResult_t (*closeSend)(void* sendComm);
87
+ ncclResult_t (*closeRecv)(void* recvComm);
88
+ ncclResult_t (*closeListen)(void* listenComm);
89
+
90
+ // Copy the given mhandle to a dptr in a format usable by this plugin's device code
91
+ ncclResult_t (*getDeviceMr)(void* comm, void* mhandle, void** dptr_mhandle);
92
+
93
+ // Notify the plugin that a recv has completed by the device
94
+ ncclResult_t (*irecvConsumed)(void* recvComm, int n, void* request);
95
+ } ncclNet_v8_t;
96
+
97
+ typedef ncclNet_v8_t ncclNet_t;
98
+
99
+ #define NCCL_NET_PLUGIN_SYMBOL ncclNetPlugin_v8
100
+
101
+ typedef struct {
102
+ void* mhandle;
103
+ void* address;
104
+ uint32_t size;
105
+ } ncclNetSGE_v8_t;
106
+
107
+ typedef struct {
108
+ // Name of the collective network (mainly for logs)
109
+ const char* name;
110
+ // Initialize the collective network.
111
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
112
+ // Return the number of adapters capable of doing collective operations.
113
+ // If ndev returns 0, all other functions might be set to NULL.
114
+ ncclResult_t (*devices)(int* ndev);
115
+ // Get various device properties.
116
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v8_t* props);
117
+ // Create a receiving object and provide a handle to connect to it. The
118
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
119
+ // between ranks to create connections.
120
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
121
+ // Create a group for collective operations. handles have been created
122
+ // using listen() above. rank indicates caller's rank in the collective network.
123
+ ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
124
+ // Returns whether a reduction operation on a data type is supported.
125
+ // 1 for supported, 0 otherwise.
126
+ ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
127
+ // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
128
+ ncclResult_t (*regMr)(void* collComm, void* data, size_t size, int type, void** mhandle);
129
+ /* DMA-BUF support */
130
+ ncclResult_t (*regMrDmaBuf)(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
131
+ ncclResult_t (*deregMr)(void* collComm, void* mhandle);
132
+ // Performs an asynchronous allreduce operation on the collective group.
133
+ // May return request == NULL if the call cannot be performed (or would block).
134
+ ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
135
+ ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
136
+ ncclResult_t (*iallgather)(void* collComm, void* sendData, int nRecvParts, ncclNetSGE_v8_t* recvParts,
137
+ size_t bytesPerRank, size_t windowOffset, size_t windowBytes,
138
+ void* sendMhandle, void** request);
139
+ ncclResult_t (*ireducescatter)(void* collComm, int nSendParts, ncclNetSGE_v8_t* sendParts, void* recvData,
140
+ size_t bytesPerRank, size_t windowOffset, size_t windowBytes,
141
+ ncclDataType_t dataType, ncclRedOp_t redOp,
142
+ void* recvMhandle, void** request);
143
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
144
+ // visible to the GPU
145
+ ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
146
+ // Test whether a request is complete. If size is not NULL, it returns the
147
+ // number of bytes sent/received.
148
+ ncclResult_t (*test)(void* request, int* done, int* size);
149
+ // Close and free collective comm objects
150
+ ncclResult_t (*closeColl)(void* collComm);
151
+ ncclResult_t (*closeListen)(void* listenComm);
152
+ } ncclCollNet_v8_t;
153
+
154
+ typedef ncclCollNet_v8_t ncclCollNet_t;
155
+
156
+ #define NCCL_COLLNET_PLUGIN_SYMBOL ncclCollNetPlugin_v8
157
+
158
+ typedef struct {
159
+ char* name; // Used mostly for logging.
160
+ char* pciPath; // Path to the PCI device in /sys.
161
+ uint64_t guid; // Unique identifier for the NIC chip. Important for
162
+ // cards with multiple PCI functions (Physical or virtual).
163
+ int ptrSupport; // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF]
164
+ int speed; // Port speed in Mbps.
165
+ int port; // Port number.
166
+ float latency; // Network latency
167
+ int maxComms; // Maximum number of comms we can create
168
+ int maxRecvs; // Maximum number of grouped receives.
169
+ ncclNetDeviceType netDeviceType; // Network offload type
170
+ int netDeviceVersion; // Version number for network offload
171
+ } ncclNetProperties_v7_t;
172
+
173
+ typedef struct {
174
+ // Name of the network (mainly for logs)
175
+ const char* name;
176
+ // Initialize the network.
177
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
178
+ // Return the number of adapters.
179
+ ncclResult_t (*devices)(int* ndev);
180
+ // Get various device properties.
181
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v7_t* props);
182
+ // Create a receiving object and provide a handle to connect to it. The
183
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
184
+ // between ranks to create a connection.
185
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
186
+ // Connect to a handle and return a sending comm object for that peer.
187
+ // This call must not block for the connection to be established, and instead
188
+ // should return successfully with sendComm == NULL with the expectation that
189
+ // it will be called again until sendComm != NULL.
190
+ // If *sendDevComm points to a valid object, then NCCL is requesting device offload for this connection
191
+ ncclResult_t (*connect)(int dev, void* handle, void** sendComm, ncclNetDeviceHandle_v7_t** sendDevComm);
192
+ // Finalize connection establishment after remote peer has called connect.
193
+ // This call must not block for the connection to be established, and instead
194
+ // should return successfully with recvComm == NULL with the expectation that
195
+ // it will be called again until recvComm != NULL.
196
+ // If *recvDevComm points to a valid object, then NCCL is requesting device offload for this connection
197
+ ncclResult_t (*accept)(void* listenComm, void** recvComm, ncclNetDeviceHandle_v7_t** recvDevComm);
198
+ // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
199
+ // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
200
+ ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
201
+ /* DMA-BUF support */
202
+ ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
203
+ ncclResult_t (*deregMr)(void* comm, void* mhandle);
204
+ // Asynchronous send to a peer.
205
+ // May return request == NULL if the call cannot be performed (or would block)
206
+ ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
207
+ // Asynchronous recv from a peer.
208
+ // May return request == NULL if the call cannot be performed (or would block)
209
+ ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
210
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
211
+ // visible to the GPU
212
+ ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
213
+ // Test whether a request is complete. If size is not NULL, it returns the
214
+ // number of bytes sent/received.
215
+ ncclResult_t (*test)(void* request, int* done, int* sizes);
216
+ // Close and free send/recv comm objects
217
+ ncclResult_t (*closeSend)(void* sendComm);
218
+ ncclResult_t (*closeRecv)(void* recvComm);
219
+ ncclResult_t (*closeListen)(void* listenComm);
220
+
221
+ // Copy the given mhandle to a dptr in a format usable by this plugin's device code
222
+ ncclResult_t (*getDeviceMr)(void* comm, void* mhandle, void** dptr_mhandle);
223
+
224
+ // Notify the plugin that a recv has completed by the device
225
+ ncclResult_t (*irecvConsumed)(void* recvComm, int n, void* request);
226
+ } ncclNet_v7_t;
227
+
228
+ typedef struct {
229
+ // Name of the collective network (mainly for logs)
230
+ const char* name;
231
+ // Initialize the collective network.
232
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
233
+ // Return the number of adapters capable of doing collective operations.
234
+ // If ndev returns 0, all other functions might be set to NULL.
235
+ ncclResult_t (*devices)(int* ndev);
236
+ // Get various device properties.
237
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v7_t* props);
238
+ // Create a receiving object and provide a handle to connect to it. The
239
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
240
+ // between ranks to create connections.
241
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
242
+ // Create a group for collective operations. handles have been created
243
+ // using listen() above. rank indicates caller's rank in the collective network.
244
+ ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
245
+ // Returns whether a reduction operation on a data type is supported.
246
+ // 1 for supported, 0 otherwise.
247
+ ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
248
+ // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
249
+ ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle);
250
+ /* DMA-BUF support */
251
+ ncclResult_t (*regMrDmaBuf)(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
252
+ ncclResult_t (*deregMr)(void* collComm, void* mhandle);
253
+ // Performs an asynchronous allreduce operation on the collective group.
254
+ // May return request == NULL if the call cannot be performed (or would block).
255
+ ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
256
+ ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
257
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
258
+ // visible to the GPU
259
+ ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
260
+ // Test whether a request is complete. If size is not NULL, it returns the
261
+ // number of bytes sent/received.
262
+ ncclResult_t (*test)(void* request, int* done, int* size);
263
+ // Close and free collective comm objects
264
+ ncclResult_t (*closeColl)(void* collComm);
265
+ ncclResult_t (*closeListen)(void* listenComm);
266
+ } ncclCollNet_v7_t;
267
+
268
+ #define NCCL_NET_MAX_REQUESTS_V6 8
269
+
270
+ // v6 struct for backwards compatibility
271
+ typedef struct {
272
+ char* name; // Used mostly for logging.
273
+ char* pciPath; // Path to the PCI device in /sys.
274
+ uint64_t guid; // Unique identifier for the NIC chip. Important for
275
+ // cards with multiple PCI functions (Physical or virtual).
276
+ int ptrSupport; // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF]
277
+ int speed; // Port speed in Mbps.
278
+ int port; // Port number.
279
+ float latency; // Network latency
280
+ int maxComms; // Maximum number of comms we can create
281
+ int maxRecvs; // Maximum number of grouped receives.
282
+ } ncclNetProperties_v6_t;
283
+
284
+ typedef struct {
285
+ // Name of the network (mainly for logs)
286
+ const char* name;
287
+ // Initialize the network.
288
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
289
+ // Return the number of adapters.
290
+ ncclResult_t (*devices)(int* ndev);
291
+ // Get various device properties.
292
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
293
+ // Create a receiving object and provide a handle to connect to it. The
294
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
295
+ // between ranks to create a connection.
296
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
297
+ // Connect to a handle and return a sending comm object for that peer.
298
+ // This call must not block for the connection to be established, and instead
299
+ // should return successfully with sendComm == NULL with the expectation that
300
+ // it will be called again until sendComm != NULL.
301
+ ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
302
+ // Finalize connection establishment after remote peer has called connect.
303
+ // This call must not block for the connection to be established, and instead
304
+ // should return successfully with recvComm == NULL with the expectation that
305
+ // it will be called again until recvComm != NULL.
306
+ ncclResult_t (*accept)(void* listenComm, void** recvComm);
307
+ // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
308
+ // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
309
+ ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
310
+ /* DMA-BUF support */
311
+ ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
312
+ ncclResult_t (*deregMr)(void* comm, void* mhandle);
313
+ // Asynchronous send to a peer.
314
+ // May return request == NULL if the call cannot be performed (or would block)
315
+ ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
316
+ // Asynchronous recv from a peer.
317
+ // May return request == NULL if the call cannot be performed (or would block)
318
+ ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
319
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
320
+ // visible to the GPU
321
+ ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
322
+ // Test whether a request is complete. If size is not NULL, it returns the
323
+ // number of bytes sent/received.
324
+ ncclResult_t (*test)(void* request, int* done, int* sizes);
325
+ // Close and free send/recv comm objects
326
+ ncclResult_t (*closeSend)(void* sendComm);
327
+ ncclResult_t (*closeRecv)(void* recvComm);
328
+ ncclResult_t (*closeListen)(void* listenComm);
329
+ } ncclNet_v6_t;
330
+
331
+ typedef struct {
332
+ // Name of the collective network (mainly for logs)
333
+ const char* name;
334
+ // Initialize the collective network.
335
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
336
+ // Return the number of adapters capable of doing collective operations.
337
+ // If ndev returns 0, all other functions might be set to NULL.
338
+ ncclResult_t (*devices)(int* ndev);
339
+ // Get various device properties.
340
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
341
+ // Create a receiving object and provide a handle to connect to it. The
342
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
343
+ // between ranks to create connections.
344
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
345
+ // Create a group for collective operations. handles have been created
346
+ // using listen() above. rank indicates caller's rank in the collective network.
347
+ ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
348
+ // Returns whether a reduction operation on a data type is supported.
349
+ // 1 for supported, 0 otherwise.
350
+ ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
351
+ // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
352
+ ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle);
353
+ /* DMA-BUF support */
354
+ ncclResult_t (*regMrDmaBuf)(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
355
+ ncclResult_t (*deregMr)(void* collComm, void* mhandle);
356
+ // Performs an asynchronous allreduce operation on the collective group.
357
+ // May return request == NULL if the call cannot be performed (or would block).
358
+ ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
359
+ ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
360
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
361
+ // visible to the GPU
362
+ ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
363
+ // Test whether a request is complete. If size is not NULL, it returns the
364
+ // number of bytes sent/received.
365
+ ncclResult_t (*test)(void* request, int* done, int* size);
366
+ // Close and free collective comm objects
367
+ ncclResult_t (*closeColl)(void* collComm);
368
+ ncclResult_t (*closeListen)(void* listenComm);
369
+ } ncclCollNet_v6_t;
370
+
371
+ // v5 struct for backwards compatibility
372
+ typedef struct {
373
+ // Name of the network (mainly for logs)
374
+ const char* name;
375
+ // Initialize the network.
376
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
377
+ // Return the number of adapters.
378
+ ncclResult_t (*devices)(int* ndev);
379
+ // Get various device properties.
380
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
381
+ // Create a receiving object and provide a handle to connect to it. The
382
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
383
+ // between ranks to create a connection.
384
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
385
+ // Connect to a handle and return a sending comm object for that peer.
386
+ // This call must not block for the connection to be established, and instead
387
+ // should return successfully with sendComm == NULL with the expectation that
388
+ // it will be called again until sendComm != NULL.
389
+ ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
390
+ // Finalize connection establishment after remote peer has called connect.
391
+ // This call must not block for the connection to be established, and instead
392
+ // should return successfully with recvComm == NULL with the expectation that
393
+ // it will be called again until recvComm != NULL.
394
+ ncclResult_t (*accept)(void* listenComm, void** recvComm);
395
+ // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
396
+ // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
397
+ ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
398
+ ncclResult_t (*deregMr)(void* comm, void* mhandle);
399
+ // Asynchronous send to a peer.
400
+ // May return request == NULL if the call cannot be performed (or would block)
401
+ ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
402
+ // Asynchronous recv from a peer.
403
+ // May return request == NULL if the call cannot be performed (or would block)
404
+ ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
405
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
406
+ // visible to the GPU
407
+ ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
408
+ // Test whether a request is complete. If size is not NULL, it returns the
409
+ // number of bytes sent/received.
410
+ ncclResult_t (*test)(void* request, int* done, int* sizes);
411
+ // Close and free send/recv comm objects
412
+ ncclResult_t (*closeSend)(void* sendComm);
413
+ ncclResult_t (*closeRecv)(void* recvComm);
414
+ ncclResult_t (*closeListen)(void* listenComm);
415
+ } ncclNet_v5_t;
416
+
417
+ // v5 struct for backwards compatibility
418
+ typedef struct {
419
+ // Name of the collective network (mainly for logs)
420
+ const char* name;
421
+ // Initialize the collective network.
422
+ ncclResult_t (*init)(ncclDebugLogger_t logFunction);
423
+ // Return the number of adapters capable of doing collective operations.
424
+ // If ndev returns 0, all other functions might be set to NULL.
425
+ ncclResult_t (*devices)(int* ndev);
426
+ // Get various device properties.
427
+ ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
428
+ // Create a receiving object and provide a handle to connect to it. The
429
+ // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
430
+ // between ranks to create connections.
431
+ ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
432
+ // Create a group for collective operations. handles have been created
433
+ // using listen() above. rank indicates caller's rank in the collective network.
434
+ ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
435
+ // Returns whether a reduction operation on a data type is supported.
436
+ // 1 for supported, 0 otherwise.
437
+ ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
438
+ // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
439
+ ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle);
440
+ ncclResult_t (*deregMr)(void* collComm, void* mhandle);
441
+ // Performs an asynchronous allreduce operation on the collective group.
442
+ // May return request == NULL if the call cannot be performed (or would block).
443
+ ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
444
+ ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
445
+ // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
446
+ // visible to the GPU
447
+ ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
448
+ // Test whether a request is complete. If size is not NULL, it returns the
449
+ // number of bytes sent/received.
450
+ ncclResult_t (*test)(void* request, int* done, int* size);
451
+ // Close and free collective comm objects
452
+ ncclResult_t (*closeColl)(void* collComm);
453
+ ncclResult_t (*closeListen)(void* listenComm);
454
+ } ncclCollNet_v5_t;
455
+
456
+ #endif // end include guard
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvToolsExtCudaRt.h ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2009-2016 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO USER:
5
+ *
6
+ * This source code is subject to NVIDIA ownership rights under U.S. and
7
+ * international Copyright laws.
8
+ *
9
+ * This software and the information contained herein is PROPRIETARY and
10
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
11
+ * of a form of NVIDIA software license agreement.
12
+ *
13
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
14
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
15
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
16
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
17
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
18
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
19
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
20
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
21
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
22
+ * OR PERFORMANCE OF THIS SOURCE CODE.
23
+ *
24
+ * U.S. Government End Users. This source code is a "commercial item" as
25
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
26
+ * "commercial computer software" and "commercial computer software
27
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
28
+ * and is provided to the U.S. Government only as a commercial end item.
29
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
30
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
31
+ * source code with only those rights set forth herein.
32
+ *
33
+ * Any use of this source code in individual and commercial software must
34
+ * include, in the user documentation and internal comments to the code,
35
+ * the above Disclaimer and U.S. Government End Users Notice.
36
+ */
37
+
38
+ #include "nvToolsExt.h"
39
+
40
+ #include "cuda.h"
41
+ #include "driver_types.h"
42
+
43
+ #ifndef NVTOOLSEXT_CUDART_V3
44
+ #define NVTOOLSEXT_CUDART_V3
45
+
46
+ #ifdef __cplusplus
47
+ extern "C" {
48
+ #endif /* __cplusplus */
49
+
50
+ /* ========================================================================= */
51
+ /** \name Functions for CUDA Resource Naming
52
+ */
53
+ /** \addtogroup RESOURCE_NAMING
54
+ * \section RESOURCE_NAMING_CUDART CUDA Runtime Resource Naming
55
+ *
56
+ * This section covers the API functions that allow to annotate CUDA resources
57
+ * with user-provided names.
58
+ *
59
+ * @{
60
+ */
61
+
62
+ /* ------------------------------------------------------------------------- */
63
+ /* \cond SHOW_HIDDEN
64
+ * \brief Used to build a non-colliding value for resource types separated class
65
+ * \version \NVTX_VERSION_2
66
+ */
67
+ #define NVTX_RESOURCE_CLASS_CUDART 5
68
+ /** \endcond */
69
+
70
+ /* ------------------------------------------------------------------------- */
71
+ /** \brief Resource types for CUDART
72
+ */
73
+ typedef enum nvtxResourceCUDARTType_t
74
+ {
75
+ NVTX_RESOURCE_TYPE_CUDART_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDART, 0), /* int device */
76
+ NVTX_RESOURCE_TYPE_CUDART_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDART, 1), /* cudaStream_t */
77
+ NVTX_RESOURCE_TYPE_CUDART_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDART, 2), /* cudaEvent_t */
78
+ } nvtxResourceCUDARTType_t;
79
+
80
+
81
+ /* ------------------------------------------------------------------------- */
82
+ /** \brief Annotates a CUDA device.
83
+ *
84
+ * Allows the user to associate a CUDA device with a user-provided name.
85
+ *
86
+ * \param device - The id of the CUDA device to name.
87
+ * \param name - The name of the CUDA device.
88
+ *
89
+ * \version \NVTX_VERSION_1
90
+ * @{ */
91
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name);
92
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name);
93
+ /** @} */
94
+
95
+ /* ------------------------------------------------------------------------- */
96
+ /** \brief Annotates a CUDA stream.
97
+ *
98
+ * Allows the user to associate a CUDA stream with a user-provided name.
99
+ *
100
+ * \param stream - The handle of the CUDA stream to name.
101
+ * \param name - The name of the CUDA stream.
102
+ *
103
+ * \version \NVTX_VERSION_1
104
+ * @{ */
105
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name);
106
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name);
107
+ /** @} */
108
+
109
+ /* ------------------------------------------------------------------------- */
110
+ /** \brief Annotates a CUDA event.
111
+ *
112
+ * Allows the user to associate a CUDA event with a user-provided name.
113
+ *
114
+ * \param event - The handle of the CUDA event to name.
115
+ * \param name - The name of the CUDA event.
116
+ *
117
+ * \version \NVTX_VERSION_1
118
+ * @{ */
119
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name);
120
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name);
121
+ /** @} */
122
+
123
+ /** @} */ /* END RESOURCE_NAMING */
124
+
125
+ /* ========================================================================= */
126
+ #ifdef UNICODE
127
+ #define nvtxNameCudaDevice nvtxNameCudaDeviceW
128
+ #define nvtxNameCudaStream nvtxNameCudaStreamW
129
+ #define nvtxNameCudaEvent nvtxNameCudaEventW
130
+ #else
131
+ #define nvtxNameCudaDevice nvtxNameCudaDeviceA
132
+ #define nvtxNameCudaStream nvtxNameCudaStreamA
133
+ #define nvtxNameCudaEvent nvtxNameCudaEventA
134
+ #endif
135
+
136
+ #ifdef __cplusplus
137
+ }
138
+ #endif /* __cplusplus */
139
+
140
+ #ifndef NVTX_NO_IMPL
141
+ #define NVTX_IMPL_GUARD_CUDART /* Ensure other headers cannot included directly */
142
+ #include "nvtxDetail/nvtxImplCudaRt_v3.h"
143
+ #undef NVTX_IMPL_GUARD_CUDART
144
+ #endif /*NVTX_NO_IMPL*/
145
+
146
+ #endif /* NVTOOLSEXT_CUDART_V3 */
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCore.h ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib)
2
+ {
3
+ #ifndef NVTX_DISABLE
4
+ nvtxMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr;
5
+ if(local!=0)
6
+ (*local)(eventAttrib);
7
+ #endif /*NVTX_DISABLE*/
8
+ }
9
+
10
+ NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message)
11
+ {
12
+ #ifndef NVTX_DISABLE
13
+ nvtxMarkA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr;
14
+ if(local!=0)
15
+ (*local)(message);
16
+ #endif /*NVTX_DISABLE*/
17
+ }
18
+
19
+ NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message)
20
+ {
21
+ #ifndef NVTX_DISABLE
22
+ nvtxMarkW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr;
23
+ if(local!=0)
24
+ (*local)(message);
25
+ #endif /*NVTX_DISABLE*/
26
+ }
27
+
28
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib)
29
+ {
30
+ #ifndef NVTX_DISABLE
31
+ nvtxRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr;
32
+ if(local!=0)
33
+ return (*local)(eventAttrib);
34
+ else
35
+ #endif /*NVTX_DISABLE*/
36
+ return (nvtxRangeId_t)0;
37
+ }
38
+
39
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message)
40
+ {
41
+ #ifndef NVTX_DISABLE
42
+ nvtxRangeStartA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr;
43
+ if(local!=0)
44
+ return (*local)(message);
45
+ else
46
+ #endif /*NVTX_DISABLE*/
47
+ return (nvtxRangeId_t)0;
48
+ }
49
+
50
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message)
51
+ {
52
+ #ifndef NVTX_DISABLE
53
+ nvtxRangeStartW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr;
54
+ if(local!=0)
55
+ return (*local)(message);
56
+ else
57
+ #endif /*NVTX_DISABLE*/
58
+ return (nvtxRangeId_t)0;
59
+ }
60
+
61
+ NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id)
62
+ {
63
+ #ifndef NVTX_DISABLE
64
+ nvtxRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr;
65
+ if(local!=0)
66
+ (*local)(id);
67
+ #endif /*NVTX_DISABLE*/
68
+ }
69
+
70
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib)
71
+ {
72
+ #ifndef NVTX_DISABLE
73
+ nvtxRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr;
74
+ if(local!=0)
75
+ return (*local)(eventAttrib);
76
+ else
77
+ #endif /*NVTX_DISABLE*/
78
+ return (int)NVTX_NO_PUSH_POP_TRACKING;
79
+ }
80
+
81
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message)
82
+ {
83
+ #ifndef NVTX_DISABLE
84
+ nvtxRangePushA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr;
85
+ if(local!=0)
86
+ return (*local)(message);
87
+ else
88
+ #endif /*NVTX_DISABLE*/
89
+ return (int)NVTX_NO_PUSH_POP_TRACKING;
90
+ }
91
+
92
+ NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message)
93
+ {
94
+ #ifndef NVTX_DISABLE
95
+ nvtxRangePushW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr;
96
+ if(local!=0)
97
+ return (*local)(message);
98
+ else
99
+ #endif /*NVTX_DISABLE*/
100
+ return (int)NVTX_NO_PUSH_POP_TRACKING;
101
+ }
102
+
103
+ NVTX_DECLSPEC int NVTX_API nvtxRangePop(void)
104
+ {
105
+ #ifndef NVTX_DISABLE
106
+ nvtxRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr;
107
+ if(local!=0)
108
+ return (*local)();
109
+ else
110
+ #endif /*NVTX_DISABLE*/
111
+ return (int)NVTX_NO_PUSH_POP_TRACKING;
112
+ }
113
+
114
+ NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name)
115
+ {
116
+ #ifndef NVTX_DISABLE
117
+ nvtxNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr;
118
+ if(local!=0)
119
+ (*local)(category, name);
120
+ #endif /*NVTX_DISABLE*/
121
+ }
122
+
123
+ NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name)
124
+ {
125
+ #ifndef NVTX_DISABLE
126
+ nvtxNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr;
127
+ if(local!=0)
128
+ (*local)(category, name);
129
+ #endif /*NVTX_DISABLE*/
130
+ }
131
+
132
+ NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name)
133
+ {
134
+ #ifndef NVTX_DISABLE
135
+ nvtxNameOsThreadA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr;
136
+ if(local!=0)
137
+ (*local)(threadId, name);
138
+ #endif /*NVTX_DISABLE*/
139
+ }
140
+
141
+ NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name)
142
+ {
143
+ #ifndef NVTX_DISABLE
144
+ nvtxNameOsThreadW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr;
145
+ if(local!=0)
146
+ (*local)(threadId, name);
147
+ #endif /*NVTX_DISABLE*/
148
+ }
149
+
150
+ NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
151
+ {
152
+ #ifndef NVTX_DISABLE
153
+ nvtxDomainMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr;
154
+ if(local!=0)
155
+ (*local)(domain, eventAttrib);
156
+ #endif /*NVTX_DISABLE*/
157
+ }
158
+
159
+ NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
160
+ {
161
+ #ifndef NVTX_DISABLE
162
+ nvtxDomainRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr;
163
+ if(local!=0)
164
+ return (*local)(domain, eventAttrib);
165
+ else
166
+ #endif /*NVTX_DISABLE*/
167
+ return (nvtxRangeId_t)0;
168
+ }
169
+
170
+ NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id)
171
+ {
172
+ #ifndef NVTX_DISABLE
173
+ nvtxDomainRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr;
174
+ if(local!=0)
175
+ (*local)(domain, id);
176
+ #endif /*NVTX_DISABLE*/
177
+ }
178
+
179
+ NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
180
+ {
181
+ #ifndef NVTX_DISABLE
182
+ nvtxDomainRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr;
183
+ if(local!=0)
184
+ return (*local)(domain, eventAttrib);
185
+ else
186
+ #endif /*NVTX_DISABLE*/
187
+ return (int)NVTX_NO_PUSH_POP_TRACKING;
188
+ }
189
+
190
+ NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain)
191
+ {
192
+ #ifndef NVTX_DISABLE
193
+ nvtxDomainRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr;
194
+ if(local!=0)
195
+ return (*local)(domain);
196
+ else
197
+ #endif /*NVTX_DISABLE*/
198
+ return (int)NVTX_NO_PUSH_POP_TRACKING;
199
+ }
200
+
201
+ NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs)
202
+ {
203
+ #ifndef NVTX_DISABLE
204
+ nvtxDomainResourceCreate_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr;
205
+ if(local!=0)
206
+ return (*local)(domain, attribs);
207
+ else
208
+ #endif /*NVTX_DISABLE*/
209
+ return (nvtxResourceHandle_t)0;
210
+ }
211
+
212
+ NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource)
213
+ {
214
+ #ifndef NVTX_DISABLE
215
+ nvtxDomainResourceDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr;
216
+ if(local!=0)
217
+ (*local)(resource);
218
+ #endif /*NVTX_DISABLE*/
219
+ }
220
+
221
+ NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name)
222
+ {
223
+ #ifndef NVTX_DISABLE
224
+ nvtxDomainNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr;
225
+ if(local!=0)
226
+ (*local)(domain, category, name);
227
+ #endif /*NVTX_DISABLE*/
228
+ }
229
+
230
+ NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name)
231
+ {
232
+ #ifndef NVTX_DISABLE
233
+ nvtxDomainNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr;
234
+ if(local!=0)
235
+ (*local)(domain, category, name);
236
+ #endif /*NVTX_DISABLE*/
237
+ }
238
+
239
+ NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string)
240
+ {
241
+ #ifndef NVTX_DISABLE
242
+ nvtxDomainRegisterStringA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr;
243
+ if(local!=0)
244
+ return (*local)(domain, string);
245
+ else
246
+ #endif /*NVTX_DISABLE*/
247
+ return (nvtxStringHandle_t)0;
248
+ }
249
+
250
+ NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string)
251
+ {
252
+ #ifndef NVTX_DISABLE
253
+ nvtxDomainRegisterStringW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr;
254
+ if(local!=0)
255
+ return (*local)(domain, string);
256
+ else
257
+ #endif /*NVTX_DISABLE*/
258
+ return (nvtxStringHandle_t)0;
259
+ }
260
+
261
+ NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* message)
262
+ {
263
+ #ifndef NVTX_DISABLE
264
+ nvtxDomainCreateA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr;
265
+ if(local!=0)
266
+ return (*local)(message);
267
+ else
268
+ #endif /*NVTX_DISABLE*/
269
+ return (nvtxDomainHandle_t)0;
270
+ }
271
+
272
+ NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* message)
273
+ {
274
+ #ifndef NVTX_DISABLE
275
+ nvtxDomainCreateW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr;
276
+ if(local!=0)
277
+ return (*local)(message);
278
+ else
279
+ #endif /*NVTX_DISABLE*/
280
+ return (nvtxDomainHandle_t)0;
281
+ }
282
+
283
+ NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain)
284
+ {
285
+ #ifndef NVTX_DISABLE
286
+ nvtxDomainDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr;
287
+ if(local!=0)
288
+ (*local)(domain);
289
+ #endif /*NVTX_DISABLE*/
290
+ }
291
+
292
+ NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved)
293
+ {
294
+ #ifndef NVTX_DISABLE
295
+ nvtxInitialize_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr;
296
+ if(local!=0)
297
+ (*local)(reserved);
298
+ #endif /*NVTX_DISABLE*/
299
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* This file was procedurally generated! Do not modify this file by hand. */
2
+
3
+ /*
4
+ * Copyright 2009-2016 NVIDIA Corporation. All rights reserved.
5
+ *
6
+ * NOTICE TO USER:
7
+ *
8
+ * This source code is subject to NVIDIA ownership rights under U.S. and
9
+ * international Copyright laws.
10
+ *
11
+ * This software and the information contained herein is PROPRIETARY and
12
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
13
+ * of a form of NVIDIA software license agreement.
14
+ *
15
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
16
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
17
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
18
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
19
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
20
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
21
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
22
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
23
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
24
+ * OR PERFORMANCE OF THIS SOURCE CODE.
25
+ *
26
+ * U.S. Government End Users. This source code is a "commercial item" as
27
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
28
+ * "commercial computer software" and "commercial computer software
29
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
30
+ * and is provided to the U.S. Government only as a commercial end item.
31
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
32
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
33
+ * source code with only those rights set forth herein.
34
+ *
35
+ * Any use of this source code in individual and commercial software must
36
+ * include, in the user documentation and internal comments to the code,
37
+ * the above Disclaimer and U.S. Government End Users Notice.
38
+ */
39
+
40
+ #ifndef NVTX_IMPL_GUARD_CUDART
41
+ #error Never include this file directly -- it is automatically included by nvToolsExtCudaRt.h (except when NVTX_NO_IMPL is defined).
42
+ #endif
43
+
44
+ #ifdef __cplusplus
45
+ extern "C" {
46
+ #endif /* __cplusplus */
47
+
48
+ //typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
49
+ //typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
50
+ typedef void (NVTX_API * nvtxNameCudaStreamA_impl_fntype)(cudaStream_t stream, const char* name);
51
+ typedef void (NVTX_API * nvtxNameCudaStreamW_impl_fntype)(cudaStream_t stream, const wchar_t* name);
52
+ typedef void (NVTX_API * nvtxNameCudaEventA_impl_fntype)(cudaEvent_t event, const char* name);
53
+ typedef void (NVTX_API * nvtxNameCudaEventW_impl_fntype)(cudaEvent_t event, const wchar_t* name);
54
+
55
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name)
56
+ {
57
+ #ifndef NVTX_DISABLE
58
+ nvtxNameCudaDeviceA_impl_fntype local = (nvtxNameCudaDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
59
+ if(local!=0)
60
+ (*local)(device, name);
61
+ #endif /*NVTX_DISABLE*/
62
+ }
63
+
64
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name)
65
+ {
66
+ #ifndef NVTX_DISABLE
67
+ nvtxNameCudaDeviceW_impl_fntype local = (nvtxNameCudaDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
68
+ if(local!=0)
69
+ (*local)(device, name);
70
+ #endif /*NVTX_DISABLE*/
71
+ }
72
+
73
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name)
74
+ {
75
+ #ifndef NVTX_DISABLE
76
+ nvtxNameCudaStreamA_impl_fntype local = (nvtxNameCudaStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
77
+ if(local!=0)
78
+ (*local)(stream, name);
79
+ #endif /*NVTX_DISABLE*/
80
+ }
81
+
82
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name)
83
+ {
84
+ #ifndef NVTX_DISABLE
85
+ nvtxNameCudaStreamW_impl_fntype local = (nvtxNameCudaStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
86
+ if(local!=0)
87
+ (*local)(stream, name);
88
+ #endif /*NVTX_DISABLE*/
89
+ }
90
+
91
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name)
92
+ {
93
+ #ifndef NVTX_DISABLE
94
+ nvtxNameCudaEventA_impl_fntype local = (nvtxNameCudaEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
95
+ if(local!=0)
96
+ (*local)(event, name);
97
+ #endif /*NVTX_DISABLE*/
98
+ }
99
+
100
+ NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name)
101
+ {
102
+ #ifndef NVTX_DISABLE
103
+ nvtxNameCudaEventW_impl_fntype local = (nvtxNameCudaEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
104
+ if(local!=0)
105
+ (*local)(event, name);
106
+ #endif /*NVTX_DISABLE*/
107
+ }
108
+
109
+ #ifdef __cplusplus
110
+ } /* extern "C" */
111
+ #endif /* __cplusplus */
112
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* This file was procedurally generated! Do not modify this file by hand. */
2
+
3
+ /*
4
+ * Copyright 2009-2016 NVIDIA Corporation. All rights reserved.
5
+ *
6
+ * NOTICE TO USER:
7
+ *
8
+ * This source code is subject to NVIDIA ownership rights under U.S. and
9
+ * international Copyright laws.
10
+ *
11
+ * This software and the information contained herein is PROPRIETARY and
12
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
13
+ * of a form of NVIDIA software license agreement.
14
+ *
15
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
16
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
17
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
18
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
19
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
20
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
21
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
22
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
23
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
24
+ * OR PERFORMANCE OF THIS SOURCE CODE.
25
+ *
26
+ * U.S. Government End Users. This source code is a "commercial item" as
27
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
28
+ * "commercial computer software" and "commercial computer software
29
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
30
+ * and is provided to the U.S. Government only as a commercial end item.
31
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
32
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
33
+ * source code with only those rights set forth herein.
34
+ *
35
+ * Any use of this source code in individual and commercial software must
36
+ * include, in the user documentation and internal comments to the code,
37
+ * the above Disclaimer and U.S. Government End Users Notice.
38
+ */
39
+
40
+ #ifndef NVTX_IMPL_GUARD_CUDA
41
+ #error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
42
+ #endif
43
+
44
+
45
+ #ifdef __cplusplus
46
+ extern "C" {
47
+ #endif /* __cplusplus */
48
+
49
+ typedef void (NVTX_API * nvtxNameCuDeviceA_impl_fntype)(CUdevice device, const char* name);
50
+ typedef void (NVTX_API * nvtxNameCuDeviceW_impl_fntype)(CUdevice device, const wchar_t* name);
51
+ typedef void (NVTX_API * nvtxNameCuContextA_impl_fntype)(CUcontext context, const char* name);
52
+ typedef void (NVTX_API * nvtxNameCuContextW_impl_fntype)(CUcontext context, const wchar_t* name);
53
+ typedef void (NVTX_API * nvtxNameCuStreamA_impl_fntype)(CUstream stream, const char* name);
54
+ typedef void (NVTX_API * nvtxNameCuStreamW_impl_fntype)(CUstream stream, const wchar_t* name);
55
+ typedef void (NVTX_API * nvtxNameCuEventA_impl_fntype)(CUevent event, const char* name);
56
+ typedef void (NVTX_API * nvtxNameCuEventW_impl_fntype)(CUevent event, const wchar_t* name);
57
+
58
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name)
59
+ {
60
+ #ifndef NVTX_DISABLE
61
+ nvtxNameCuDeviceA_impl_fntype local = (nvtxNameCuDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
62
+ if(local!=0)
63
+ (*local)(device, name);
64
+ #endif /*NVTX_DISABLE*/
65
+ }
66
+
67
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name)
68
+ {
69
+ #ifndef NVTX_DISABLE
70
+ nvtxNameCuDeviceW_impl_fntype local = (nvtxNameCuDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
71
+ if(local!=0)
72
+ (*local)(device, name);
73
+ #endif /*NVTX_DISABLE*/
74
+ }
75
+
76
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name)
77
+ {
78
+ #ifndef NVTX_DISABLE
79
+ nvtxNameCuContextA_impl_fntype local = (nvtxNameCuContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
80
+ if(local!=0)
81
+ (*local)(context, name);
82
+ #endif /*NVTX_DISABLE*/
83
+ }
84
+
85
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name)
86
+ {
87
+ #ifndef NVTX_DISABLE
88
+ nvtxNameCuContextW_impl_fntype local = (nvtxNameCuContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
89
+ if(local!=0)
90
+ (*local)(context, name);
91
+ #endif /*NVTX_DISABLE*/
92
+ }
93
+
94
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name)
95
+ {
96
+ #ifndef NVTX_DISABLE
97
+ nvtxNameCuStreamA_impl_fntype local = (nvtxNameCuStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
98
+ if(local!=0)
99
+ (*local)(stream, name);
100
+ #endif /*NVTX_DISABLE*/
101
+ }
102
+
103
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name)
104
+ {
105
+ #ifndef NVTX_DISABLE
106
+ nvtxNameCuStreamW_impl_fntype local = (nvtxNameCuStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
107
+ if(local!=0)
108
+ (*local)(stream, name);
109
+ #endif /*NVTX_DISABLE*/
110
+ }
111
+
112
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name)
113
+ {
114
+ #ifndef NVTX_DISABLE
115
+ nvtxNameCuEventA_impl_fntype local = (nvtxNameCuEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
116
+ if(local!=0)
117
+ (*local)(event, name);
118
+ #endif /*NVTX_DISABLE*/
119
+ }
120
+
121
+ NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name)
122
+ {
123
+ #ifndef NVTX_DISABLE
124
+ nvtxNameCuEventW_impl_fntype local = (nvtxNameCuEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
125
+ if(local!=0)
126
+ (*local)(event, name);
127
+ #endif /*NVTX_DISABLE*/
128
+ }
129
+
130
+ #ifdef __cplusplus
131
+ } /* extern "C" */
132
+ #endif /* __cplusplus */
133
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* This file was procedurally generated! Do not modify this file by hand. */
2
+
3
+ /*
4
+ * Copyright 2009-2016 NVIDIA Corporation. All rights reserved.
5
+ *
6
+ * NOTICE TO USER:
7
+ *
8
+ * This source code is subject to NVIDIA ownership rights under U.S. and
9
+ * international Copyright laws.
10
+ *
11
+ * This software and the information contained herein is PROPRIETARY and
12
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
13
+ * of a form of NVIDIA software license agreement.
14
+ *
15
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
16
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
17
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
18
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
19
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
20
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
21
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
22
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
23
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
24
+ * OR PERFORMANCE OF THIS SOURCE CODE.
25
+ *
26
+ * U.S. Government End Users. This source code is a "commercial item" as
27
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
28
+ * "commercial computer software" and "commercial computer software
29
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
30
+ * and is provided to the U.S. Government only as a commercial end item.
31
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
32
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
33
+ * source code with only those rights set forth herein.
34
+ *
35
+ * Any use of this source code in individual and commercial software must
36
+ * include, in the user documentation and internal comments to the code,
37
+ * the above Disclaimer and U.S. Government End Users Notice.
38
+ */
39
+
40
+ #ifndef NVTX_IMPL_GUARD_OPENCL
41
+ #error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
42
+ #endif
43
+
44
+
45
+ #ifdef __cplusplus
46
+ extern "C" {
47
+ #endif /* __cplusplus */
48
+
49
+ typedef void (NVTX_API * nvtxNameClDeviceA_impl_fntype)(cl_device_id device, const char* name);
50
+ typedef void (NVTX_API * nvtxNameClDeviceW_impl_fntype)(cl_device_id device, const wchar_t* name);
51
+ typedef void (NVTX_API * nvtxNameClContextA_impl_fntype)(cl_context context, const char* name);
52
+ typedef void (NVTX_API * nvtxNameClContextW_impl_fntype)(cl_context context, const wchar_t* name);
53
+ typedef void (NVTX_API * nvtxNameClCommandQueueA_impl_fntype)(cl_command_queue command_queue, const char* name);
54
+ typedef void (NVTX_API * nvtxNameClCommandQueueW_impl_fntype)(cl_command_queue command_queue, const wchar_t* name);
55
+ typedef void (NVTX_API * nvtxNameClMemObjectA_impl_fntype)(cl_mem memobj, const char* name);
56
+ typedef void (NVTX_API * nvtxNameClMemObjectW_impl_fntype)(cl_mem memobj, const wchar_t* name);
57
+ typedef void (NVTX_API * nvtxNameClSamplerA_impl_fntype)(cl_sampler sampler, const char* name);
58
+ typedef void (NVTX_API * nvtxNameClSamplerW_impl_fntype)(cl_sampler sampler, const wchar_t* name);
59
+ typedef void (NVTX_API * nvtxNameClProgramA_impl_fntype)(cl_program program, const char* name);
60
+ typedef void (NVTX_API * nvtxNameClProgramW_impl_fntype)(cl_program program, const wchar_t* name);
61
+ typedef void (NVTX_API * nvtxNameClEventA_impl_fntype)(cl_event evnt, const char* name);
62
+ typedef void (NVTX_API * nvtxNameClEventW_impl_fntype)(cl_event evnt, const wchar_t* name);
63
+
64
+ NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name)
65
+ {
66
+ #ifndef NVTX_DISABLE
67
+ nvtxNameClDeviceA_impl_fntype local = (nvtxNameClDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
68
+ if(local!=0)
69
+ (*local)(device, name);
70
+ #endif /*NVTX_DISABLE*/
71
+ }
72
+
73
+ NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name)
74
+ {
75
+ #ifndef NVTX_DISABLE
76
+ nvtxNameClDeviceW_impl_fntype local = (nvtxNameClDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
77
+ if(local!=0)
78
+ (*local)(device, name);
79
+ #endif /*NVTX_DISABLE*/
80
+ }
81
+
82
+ NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name)
83
+ {
84
+ #ifndef NVTX_DISABLE
85
+ nvtxNameClContextA_impl_fntype local = (nvtxNameClContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
86
+ if(local!=0)
87
+ (*local)(context, name);
88
+ #endif /*NVTX_DISABLE*/
89
+ }
90
+
91
+ NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name)
92
+ {
93
+ #ifndef NVTX_DISABLE
94
+ nvtxNameClContextW_impl_fntype local = (nvtxNameClContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
95
+ if(local!=0)
96
+ (*local)(context, name);
97
+ #endif /*NVTX_DISABLE*/
98
+ }
99
+
100
+ NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name)
101
+ {
102
+ #ifndef NVTX_DISABLE
103
+ nvtxNameClCommandQueueA_impl_fntype local = (nvtxNameClCommandQueueA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
104
+ if(local!=0)
105
+ (*local)(command_queue, name);
106
+ #endif /*NVTX_DISABLE*/
107
+ }
108
+
109
+ NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name)
110
+ {
111
+ #ifndef NVTX_DISABLE
112
+ nvtxNameClCommandQueueW_impl_fntype local = (nvtxNameClCommandQueueW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
113
+ if(local!=0)
114
+ (*local)(command_queue, name);
115
+ #endif /*NVTX_DISABLE*/
116
+ }
117
+
118
+ NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name)
119
+ {
120
+ #ifndef NVTX_DISABLE
121
+ nvtxNameClMemObjectA_impl_fntype local = (nvtxNameClMemObjectA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
122
+ if(local!=0)
123
+ (*local)(memobj, name);
124
+ #endif /*NVTX_DISABLE*/
125
+ }
126
+
127
+ NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name)
128
+ {
129
+ #ifndef NVTX_DISABLE
130
+ nvtxNameClMemObjectW_impl_fntype local = (nvtxNameClMemObjectW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
131
+ if(local!=0)
132
+ (*local)(memobj, name);
133
+ #endif /*NVTX_DISABLE*/
134
+ }
135
+
136
+ NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name)
137
+ {
138
+ #ifndef NVTX_DISABLE
139
+ nvtxNameClSamplerA_impl_fntype local = (nvtxNameClSamplerA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
140
+ if(local!=0)
141
+ (*local)(sampler, name);
142
+ #endif /*NVTX_DISABLE*/
143
+ }
144
+
145
+ NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name)
146
+ {
147
+ #ifndef NVTX_DISABLE
148
+ nvtxNameClSamplerW_impl_fntype local = (nvtxNameClSamplerW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
149
+ if(local!=0)
150
+ (*local)(sampler, name);
151
+ #endif /*NVTX_DISABLE*/
152
+ }
153
+
154
+ NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name)
155
+ {
156
+ #ifndef NVTX_DISABLE
157
+ nvtxNameClProgramA_impl_fntype local = (nvtxNameClProgramA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
158
+ if(local!=0)
159
+ (*local)(program, name);
160
+ #endif /*NVTX_DISABLE*/
161
+ }
162
+
163
+ NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name)
164
+ {
165
+ #ifndef NVTX_DISABLE
166
+ nvtxNameClProgramW_impl_fntype local = (nvtxNameClProgramW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
167
+ if(local!=0)
168
+ (*local)(program, name);
169
+ #endif /*NVTX_DISABLE*/
170
+ }
171
+
172
+ NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name)
173
+ {
174
+ #ifndef NVTX_DISABLE
175
+ nvtxNameClEventA_impl_fntype local = (nvtxNameClEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
176
+ if(local!=0)
177
+ (*local)(evnt, name);
178
+ #endif /*NVTX_DISABLE*/
179
+ }
180
+
181
+ NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name)
182
+ {
183
+ #ifndef NVTX_DISABLE
184
+ nvtxNameClEventW_impl_fntype local = (nvtxNameClEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
185
+ if(local!=0)
186
+ (*local)(evnt, name);
187
+ #endif /*NVTX_DISABLE*/
188
+ }
189
+
190
+ #ifdef __cplusplus
191
+ } /* extern "C" */
192
+ #endif /* __cplusplus */
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* This file was procedurally generated! Do not modify this file by hand. */
2
+
3
+ /*
4
+ * Copyright 2009-2016 NVIDIA Corporation. All rights reserved.
5
+ *
6
+ * NOTICE TO USER:
7
+ *
8
+ * This source code is subject to NVIDIA ownership rights under U.S. and
9
+ * international Copyright laws.
10
+ *
11
+ * This software and the information contained herein is PROPRIETARY and
12
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
13
+ * of a form of NVIDIA software license agreement.
14
+ *
15
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
16
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
17
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
18
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
19
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
20
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
21
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
22
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
23
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
24
+ * OR PERFORMANCE OF THIS SOURCE CODE.
25
+ *
26
+ * U.S. Government End Users. This source code is a "commercial item" as
27
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
28
+ * "commercial computer software" and "commercial computer software
29
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
30
+ * and is provided to the U.S. Government only as a commercial end item.
31
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
32
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
33
+ * source code with only those rights set forth herein.
34
+ *
35
+ * Any use of this source code in individual and commercial software must
36
+ * include, in the user documentation and internal comments to the code,
37
+ * the above Disclaimer and U.S. Government End Users Notice.
38
+ */
39
+
40
+ #ifndef NVTX_IMPL_GUARD_SYNC
41
+ #error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
42
+ #endif
43
+
44
+
45
+ #ifdef __cplusplus
46
+ extern "C" {
47
+ #endif /* __cplusplus */
48
+
49
+ typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
50
+ typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
51
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
52
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
53
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
54
+ typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
55
+
56
+ NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs)
57
+ {
58
+ #ifndef NVTX_DISABLE
59
+ nvtxDomainSyncUserCreate_impl_fntype local = (nvtxDomainSyncUserCreate_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
60
+ if(local!=0)
61
+ return (*local)(domain, attribs);
62
+ else
63
+ #endif /*NVTX_DISABLE*/
64
+ return (nvtxSyncUser_t)0;
65
+ }
66
+
67
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle)
68
+ {
69
+ #ifndef NVTX_DISABLE
70
+ nvtxDomainSyncUserDestroy_impl_fntype local = (nvtxDomainSyncUserDestroy_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
71
+ if(local!=0)
72
+ (*local)(handle);
73
+ #endif /*NVTX_DISABLE*/
74
+ }
75
+
76
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle)
77
+ {
78
+ #ifndef NVTX_DISABLE
79
+ nvtxDomainSyncUserAcquireStart_impl_fntype local = (nvtxDomainSyncUserAcquireStart_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
80
+ if(local!=0)
81
+ (*local)(handle);
82
+ #endif /*NVTX_DISABLE*/
83
+ }
84
+
85
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle)
86
+ {
87
+ #ifndef NVTX_DISABLE
88
+ nvtxDomainSyncUserAcquireFailed_impl_fntype local = (nvtxDomainSyncUserAcquireFailed_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
89
+ if(local!=0)
90
+ (*local)(handle);
91
+ #endif /*NVTX_DISABLE*/
92
+ }
93
+
94
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle)
95
+ {
96
+ #ifndef NVTX_DISABLE
97
+ nvtxDomainSyncUserAcquireSuccess_impl_fntype local = (nvtxDomainSyncUserAcquireSuccess_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
98
+ if(local!=0)
99
+ (*local)(handle);
100
+ #endif /*NVTX_DISABLE*/
101
+ }
102
+
103
+ NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle)
104
+ {
105
+ #ifndef NVTX_DISABLE
106
+ nvtxDomainSyncUserReleasing_impl_fntype local = (nvtxDomainSyncUserReleasing_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
107
+ if(local!=0)
108
+ (*local)(handle);
109
+ #endif /*NVTX_DISABLE*/
110
+ }
111
+
112
+ #ifdef __cplusplus
113
+ } /* extern "C" */
114
+ #endif /* __cplusplus */
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxInitDefs.h ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef NVTX_IMPL_GUARD
2
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
3
+ #endif
4
+
5
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
6
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
7
+ nvtxMarkEx(eventAttrib);
8
+ }
9
+
10
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message){
11
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
12
+ nvtxMarkA(message);
13
+ }
14
+
15
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message){
16
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
17
+ nvtxMarkW(message);
18
+ }
19
+
20
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
21
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
22
+ return nvtxRangeStartEx(eventAttrib);
23
+ }
24
+
25
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message){
26
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
27
+ return nvtxRangeStartA(message);
28
+ }
29
+
30
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message){
31
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
32
+ return nvtxRangeStartW(message);
33
+ }
34
+
35
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id){
36
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
37
+ nvtxRangeEnd(id);
38
+ }
39
+
40
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
41
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
42
+ return nvtxRangePushEx(eventAttrib);
43
+ }
44
+
45
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message){
46
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
47
+ return nvtxRangePushA(message);
48
+ }
49
+
50
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message){
51
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
52
+ return nvtxRangePushW(message);
53
+ }
54
+
55
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void){
56
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
57
+ return nvtxRangePop();
58
+ }
59
+
60
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name){
61
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
62
+ nvtxNameCategoryA(category, name);
63
+ }
64
+
65
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name){
66
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
67
+ nvtxNameCategoryW(category, name);
68
+ }
69
+
70
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name){
71
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
72
+ nvtxNameOsThreadA(threadId, name);
73
+ }
74
+
75
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name){
76
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
77
+ nvtxNameOsThreadW(threadId, name);
78
+ }
79
+
80
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
81
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
82
+ nvtxDomainMarkEx(domain, eventAttrib);
83
+ }
84
+
85
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
86
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
87
+ return nvtxDomainRangeStartEx(domain, eventAttrib);
88
+ }
89
+
90
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id){
91
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
92
+ nvtxDomainRangeEnd(domain, id);
93
+ }
94
+
95
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
96
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
97
+ return nvtxDomainRangePushEx(domain, eventAttrib);
98
+ }
99
+
100
+ NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain){
101
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
102
+ return nvtxDomainRangePop(domain);
103
+ }
104
+
105
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs){
106
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
107
+ return nvtxDomainResourceCreate(domain, attribs);
108
+ }
109
+
110
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource){
111
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
112
+ nvtxDomainResourceDestroy(resource);
113
+ }
114
+
115
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name){
116
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
117
+ nvtxDomainNameCategoryA(domain, category, name);
118
+ }
119
+
120
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name){
121
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
122
+ nvtxDomainNameCategoryW(domain, category, name);
123
+ }
124
+
125
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string){
126
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
127
+ return nvtxDomainRegisterStringA(domain, string);
128
+ }
129
+
130
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string){
131
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
132
+ return nvtxDomainRegisterStringW(domain, string);
133
+ }
134
+
135
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message){
136
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
137
+ return nvtxDomainCreateA(message);
138
+ }
139
+
140
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message){
141
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
142
+ return nvtxDomainCreateW(message);
143
+ }
144
+
145
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain){
146
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
147
+ nvtxDomainDestroy(domain);
148
+ }
149
+
150
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved){
151
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
152
+ nvtxInitialize(reserved);
153
+ }
154
+
155
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name){
156
+ nvtxNameCuDeviceA_fakeimpl_fntype local;
157
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
158
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
159
+ if (local)
160
+ local(device, name);
161
+ }
162
+
163
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name){
164
+ nvtxNameCuDeviceW_fakeimpl_fntype local;
165
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
166
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
167
+ if (local)
168
+ local(device, name);
169
+ }
170
+
171
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name){
172
+ nvtxNameCuContextA_fakeimpl_fntype local;
173
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
174
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
175
+ if (local)
176
+ local(context, name);
177
+ }
178
+
179
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name){
180
+ nvtxNameCuContextW_fakeimpl_fntype local;
181
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
182
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
183
+ if (local)
184
+ local(context, name);
185
+ }
186
+
187
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name){
188
+ nvtxNameCuStreamA_fakeimpl_fntype local;
189
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
190
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
191
+ if (local)
192
+ local(stream, name);
193
+ }
194
+
195
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name){
196
+ nvtxNameCuStreamW_fakeimpl_fntype local;
197
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
198
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
199
+ if (local)
200
+ local(stream, name);
201
+ }
202
+
203
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name){
204
+ nvtxNameCuEventA_fakeimpl_fntype local;
205
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
206
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
207
+ if (local)
208
+ local(event, name);
209
+ }
210
+
211
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name){
212
+ nvtxNameCuEventW_fakeimpl_fntype local;
213
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
214
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
215
+ if (local)
216
+ local(event, name);
217
+ }
218
+
219
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){
220
+ nvtxNameCudaDeviceA_impl_fntype local;
221
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
222
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
223
+ if (local)
224
+ local(device, name);
225
+ }
226
+
227
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){
228
+ nvtxNameCudaDeviceW_impl_fntype local;
229
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
230
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
231
+ if (local)
232
+ local(device, name);
233
+ }
234
+
235
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name){
236
+ nvtxNameCudaStreamA_fakeimpl_fntype local;
237
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
238
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
239
+ if (local)
240
+ local(stream, name);
241
+ }
242
+
243
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name){
244
+ nvtxNameCudaStreamW_fakeimpl_fntype local;
245
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
246
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
247
+ if (local)
248
+ local(stream, name);
249
+ }
250
+
251
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name){
252
+ nvtxNameCudaEventA_fakeimpl_fntype local;
253
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
254
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
255
+ if (local)
256
+ local(event, name);
257
+ }
258
+
259
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name){
260
+ nvtxNameCudaEventW_fakeimpl_fntype local;
261
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
262
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
263
+ if (local)
264
+ local(event, name);
265
+ }
266
+
267
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name){
268
+ nvtxNameClDeviceA_fakeimpl_fntype local;
269
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
270
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
271
+ if (local)
272
+ local(device, name);
273
+ }
274
+
275
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name){
276
+ nvtxNameClDeviceW_fakeimpl_fntype local;
277
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
278
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
279
+ if (local)
280
+ local(device, name);
281
+ }
282
+
283
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name){
284
+ nvtxNameClContextA_fakeimpl_fntype local;
285
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
286
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
287
+ if (local)
288
+ local(context, name);
289
+ }
290
+
291
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name){
292
+ nvtxNameClContextW_fakeimpl_fntype local;
293
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
294
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
295
+ if (local)
296
+ local(context, name);
297
+ }
298
+
299
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name){
300
+ nvtxNameClCommandQueueA_fakeimpl_fntype local;
301
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
302
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
303
+ if (local)
304
+ local(command_queue, name);
305
+ }
306
+
307
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name){
308
+ nvtxNameClCommandQueueW_fakeimpl_fntype local;
309
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
310
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
311
+ if (local)
312
+ local(command_queue, name);
313
+ }
314
+
315
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name){
316
+ nvtxNameClMemObjectA_fakeimpl_fntype local;
317
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
318
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
319
+ if (local)
320
+ local(memobj, name);
321
+ }
322
+
323
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name){
324
+ nvtxNameClMemObjectW_fakeimpl_fntype local;
325
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
326
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
327
+ if (local)
328
+ local(memobj, name);
329
+ }
330
+
331
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name){
332
+ nvtxNameClSamplerA_fakeimpl_fntype local;
333
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
334
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
335
+ if (local)
336
+ local(sampler, name);
337
+ }
338
+
339
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name){
340
+ nvtxNameClSamplerW_fakeimpl_fntype local;
341
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
342
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
343
+ if (local)
344
+ local(sampler, name);
345
+ }
346
+
347
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name){
348
+ nvtxNameClProgramA_fakeimpl_fntype local;
349
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
350
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
351
+ if (local)
352
+ local(program, name);
353
+ }
354
+
355
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name){
356
+ nvtxNameClProgramW_fakeimpl_fntype local;
357
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
358
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
359
+ if (local)
360
+ local(program, name);
361
+ }
362
+
363
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name){
364
+ nvtxNameClEventA_fakeimpl_fntype local;
365
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
366
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
367
+ if (local)
368
+ local(evnt, name);
369
+ }
370
+
371
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name){
372
+ nvtxNameClEventW_fakeimpl_fntype local;
373
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
374
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
375
+ if (local)
376
+ local(evnt, name);
377
+ }
378
+
379
+ NVTX_LINKONCE_DEFINE_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs){
380
+ nvtxDomainSyncUserCreate_impl_fntype local;
381
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
382
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
383
+ if (local) {
384
+ return local(domain, attribs);
385
+ }
386
+ return (nvtxSyncUser_t)0;
387
+ }
388
+
389
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle){
390
+ nvtxDomainSyncUserDestroy_impl_fntype local;
391
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
392
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
393
+ if (local)
394
+ local(handle);
395
+ }
396
+
397
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle){
398
+ nvtxDomainSyncUserAcquireStart_impl_fntype local;
399
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
400
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
401
+ if (local)
402
+ local(handle);
403
+ }
404
+
405
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle){
406
+ nvtxDomainSyncUserAcquireFailed_impl_fntype local;
407
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
408
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
409
+ if (local)
410
+ local(handle);
411
+ }
412
+
413
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle){
414
+ nvtxDomainSyncUserAcquireSuccess_impl_fntype local;
415
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
416
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
417
+ if (local)
418
+ local(handle);
419
+ }
420
+
421
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle){
422
+ nvtxDomainSyncUserReleasing_impl_fntype local;
423
+ NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
424
+ local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
425
+ if (local)
426
+ local(handle);
427
+ }
428
+
429
+ NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops);
430
+ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops)
431
+ {
432
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init) || forceAllToNoops)
433
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr = NULL;
434
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init) || forceAllToNoops)
435
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr = NULL;
436
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init) || forceAllToNoops)
437
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr = NULL;
438
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init) || forceAllToNoops)
439
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr = NULL;
440
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init) || forceAllToNoops)
441
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr = NULL;
442
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init) || forceAllToNoops)
443
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr = NULL;
444
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init) || forceAllToNoops)
445
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr = NULL;
446
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init) || forceAllToNoops)
447
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr = NULL;
448
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init) || forceAllToNoops)
449
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr = NULL;
450
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init) || forceAllToNoops)
451
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr = NULL;
452
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init) || forceAllToNoops)
453
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr = NULL;
454
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init) || forceAllToNoops)
455
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr = NULL;
456
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init) || forceAllToNoops)
457
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr = NULL;
458
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init) || forceAllToNoops)
459
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr = NULL;
460
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init) || forceAllToNoops)
461
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr = NULL;
462
+
463
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init) || forceAllToNoops)
464
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr = NULL;
465
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init) || forceAllToNoops)
466
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr = NULL;
467
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init) || forceAllToNoops)
468
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr = NULL;
469
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init) || forceAllToNoops)
470
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr = NULL;
471
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init) || forceAllToNoops)
472
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr = NULL;
473
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init) || forceAllToNoops)
474
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr = NULL;
475
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init) || forceAllToNoops)
476
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr = NULL;
477
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init) || forceAllToNoops)
478
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr = NULL;
479
+
480
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init) || forceAllToNoops)
481
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr = NULL;
482
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init) || forceAllToNoops)
483
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr = NULL;
484
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init) || forceAllToNoops)
485
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr = NULL;
486
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init) || forceAllToNoops)
487
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr = NULL;
488
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init) || forceAllToNoops)
489
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr = NULL;
490
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init) || forceAllToNoops)
491
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr = NULL;
492
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init) || forceAllToNoops)
493
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr = NULL;
494
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init) || forceAllToNoops)
495
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr = NULL;
496
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init) || forceAllToNoops)
497
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr = NULL;
498
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init) || forceAllToNoops)
499
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr = NULL;
500
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init) || forceAllToNoops)
501
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr = NULL;
502
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init) || forceAllToNoops)
503
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr = NULL;
504
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init) || forceAllToNoops)
505
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr = NULL;
506
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init) || forceAllToNoops)
507
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr = NULL;
508
+
509
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init) || forceAllToNoops)
510
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr = NULL;
511
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init) || forceAllToNoops)
512
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr = NULL;
513
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init) || forceAllToNoops)
514
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr = NULL;
515
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init) || forceAllToNoops)
516
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr = NULL;
517
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init) || forceAllToNoops)
518
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr = NULL;
519
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init) || forceAllToNoops)
520
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr = NULL;
521
+
522
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init) || forceAllToNoops)
523
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr = NULL;
524
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init) || forceAllToNoops)
525
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr = NULL;
526
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init) || forceAllToNoops)
527
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr = NULL;
528
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init) || forceAllToNoops)
529
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr = NULL;
530
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init) || forceAllToNoops)
531
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr = NULL;
532
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init) || forceAllToNoops)
533
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr = NULL;
534
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init) || forceAllToNoops)
535
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr = NULL;
536
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init) || forceAllToNoops)
537
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr = NULL;
538
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init) || forceAllToNoops)
539
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr = NULL;
540
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init) || forceAllToNoops)
541
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr = NULL;
542
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init) || forceAllToNoops)
543
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr = NULL;
544
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init) || forceAllToNoops)
545
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr = NULL;
546
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init) || forceAllToNoops)
547
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr = NULL;
548
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init) || forceAllToNoops)
549
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr = NULL;
550
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init) || forceAllToNoops)
551
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr = NULL;
552
+
553
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init) || forceAllToNoops)
554
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr = NULL;
555
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init) || forceAllToNoops)
556
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr = NULL;
557
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init) || forceAllToNoops)
558
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr = NULL;
559
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init) || forceAllToNoops)
560
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr = NULL;
561
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init) || forceAllToNoops)
562
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr = NULL;
563
+ if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init) || forceAllToNoops)
564
+ NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr = NULL;
565
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/include/nvtx3/nvtxDetail/nvtxTypes.h ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2009-2016 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO USER:
5
+ *
6
+ * This source code is subject to NVIDIA ownership rights under U.S. and
7
+ * international Copyright laws.
8
+ *
9
+ * This software and the information contained herein is PROPRIETARY and
10
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
11
+ * of a form of NVIDIA software license agreement.
12
+ *
13
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
14
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
15
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
16
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
17
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
18
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
19
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
20
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
21
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
22
+ * OR PERFORMANCE OF THIS SOURCE CODE.
23
+ *
24
+ * U.S. Government End Users. This source code is a "commercial item" as
25
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
26
+ * "commercial computer software" and "commercial computer software
27
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
28
+ * and is provided to the U.S. Government only as a commercial end item.
29
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
30
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
31
+ * source code with only those rights set forth herein.
32
+ *
33
+ * Any use of this source code in individual and commercial software must
34
+ * include, in the user documentation and internal comments to the code,
35
+ * the above Disclaimer and U.S. Government End Users Notice.
36
+ */
37
+
38
+ /* This header defines types which are used by the internal implementation
39
+ * of NVTX and callback subscribers. API clients do not use these types,
40
+ * so they are defined here instead of in nvToolsExt.h to clarify they are
41
+ * not part of the NVTX client API. */
42
+
43
+ #ifndef NVTX_IMPL_GUARD
44
+ #error Never include this file directly -- it is automatically included by nvToolsExt.h.
45
+ #endif
46
+
47
+ /* ------ Dependency-free types binary-compatible with real types ------- */
48
+
49
+ /* In order to avoid having the NVTX core API headers depend on non-NVTX
50
+ * headers like cuda.h, NVTX defines binary-compatible types to use for
51
+ * safely making the initialization versions of all NVTX functions without
52
+ * needing to have definitions for the real types. */
53
+
54
+ typedef int nvtx_CUdevice;
55
+ typedef void* nvtx_CUcontext;
56
+ typedef void* nvtx_CUstream;
57
+ typedef void* nvtx_CUevent;
58
+
59
+ typedef void* nvtx_cudaStream_t;
60
+ typedef void* nvtx_cudaEvent_t;
61
+
62
+ typedef void* nvtx_cl_platform_id;
63
+ typedef void* nvtx_cl_device_id;
64
+ typedef void* nvtx_cl_context;
65
+ typedef void* nvtx_cl_command_queue;
66
+ typedef void* nvtx_cl_mem;
67
+ typedef void* nvtx_cl_program;
68
+ typedef void* nvtx_cl_kernel;
69
+ typedef void* nvtx_cl_event;
70
+ typedef void* nvtx_cl_sampler;
71
+
72
+ typedef struct nvtxSyncUser* nvtxSyncUser_t;
73
+ struct nvtxSyncUserAttributes_v0;
74
+ typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
75
+
76
+ /* --------- Types for function pointers (with fake API types) ---------- */
77
+
78
+ typedef void (NVTX_API * nvtxMarkEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
79
+ typedef void (NVTX_API * nvtxMarkA_impl_fntype)(const char* message);
80
+ typedef void (NVTX_API * nvtxMarkW_impl_fntype)(const wchar_t* message);
81
+ typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
82
+ typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartA_impl_fntype)(const char* message);
83
+ typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartW_impl_fntype)(const wchar_t* message);
84
+ typedef void (NVTX_API * nvtxRangeEnd_impl_fntype)(nvtxRangeId_t id);
85
+ typedef int (NVTX_API * nvtxRangePushEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
86
+ typedef int (NVTX_API * nvtxRangePushA_impl_fntype)(const char* message);
87
+ typedef int (NVTX_API * nvtxRangePushW_impl_fntype)(const wchar_t* message);
88
+ typedef int (NVTX_API * nvtxRangePop_impl_fntype)(void);
89
+ typedef void (NVTX_API * nvtxNameCategoryA_impl_fntype)(uint32_t category, const char* name);
90
+ typedef void (NVTX_API * nvtxNameCategoryW_impl_fntype)(uint32_t category, const wchar_t* name);
91
+ typedef void (NVTX_API * nvtxNameOsThreadA_impl_fntype)(uint32_t threadId, const char* name);
92
+ typedef void (NVTX_API * nvtxNameOsThreadW_impl_fntype)(uint32_t threadId, const wchar_t* name);
93
+
94
+ /* Real impl types are defined in nvtxImplCuda_v3.h, where CUDA headers are included */
95
+ typedef void (NVTX_API * nvtxNameCuDeviceA_fakeimpl_fntype)(nvtx_CUdevice device, const char* name);
96
+ typedef void (NVTX_API * nvtxNameCuDeviceW_fakeimpl_fntype)(nvtx_CUdevice device, const wchar_t* name);
97
+ typedef void (NVTX_API * nvtxNameCuContextA_fakeimpl_fntype)(nvtx_CUcontext context, const char* name);
98
+ typedef void (NVTX_API * nvtxNameCuContextW_fakeimpl_fntype)(nvtx_CUcontext context, const wchar_t* name);
99
+ typedef void (NVTX_API * nvtxNameCuStreamA_fakeimpl_fntype)(nvtx_CUstream stream, const char* name);
100
+ typedef void (NVTX_API * nvtxNameCuStreamW_fakeimpl_fntype)(nvtx_CUstream stream, const wchar_t* name);
101
+ typedef void (NVTX_API * nvtxNameCuEventA_fakeimpl_fntype)(nvtx_CUevent event, const char* name);
102
+ typedef void (NVTX_API * nvtxNameCuEventW_fakeimpl_fntype)(nvtx_CUevent event, const wchar_t* name);
103
+
104
+ /* Real impl types are defined in nvtxImplOpenCL_v3.h, where OPENCL headers are included */
105
+ typedef void (NVTX_API * nvtxNameClDeviceA_fakeimpl_fntype)(nvtx_cl_device_id device, const char* name);
106
+ typedef void (NVTX_API * nvtxNameClDeviceW_fakeimpl_fntype)(nvtx_cl_device_id device, const wchar_t* name);
107
+ typedef void (NVTX_API * nvtxNameClContextA_fakeimpl_fntype)(nvtx_cl_context context, const char* name);
108
+ typedef void (NVTX_API * nvtxNameClContextW_fakeimpl_fntype)(nvtx_cl_context context, const wchar_t* name);
109
+ typedef void (NVTX_API * nvtxNameClCommandQueueA_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const char* name);
110
+ typedef void (NVTX_API * nvtxNameClCommandQueueW_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const wchar_t* name);
111
+ typedef void (NVTX_API * nvtxNameClMemObjectA_fakeimpl_fntype)(nvtx_cl_mem memobj, const char* name);
112
+ typedef void (NVTX_API * nvtxNameClMemObjectW_fakeimpl_fntype)(nvtx_cl_mem memobj, const wchar_t* name);
113
+ typedef void (NVTX_API * nvtxNameClSamplerA_fakeimpl_fntype)(nvtx_cl_sampler sampler, const char* name);
114
+ typedef void (NVTX_API * nvtxNameClSamplerW_fakeimpl_fntype)(nvtx_cl_sampler sampler, const wchar_t* name);
115
+ typedef void (NVTX_API * nvtxNameClProgramA_fakeimpl_fntype)(nvtx_cl_program program, const char* name);
116
+ typedef void (NVTX_API * nvtxNameClProgramW_fakeimpl_fntype)(nvtx_cl_program program, const wchar_t* name);
117
+ typedef void (NVTX_API * nvtxNameClEventA_fakeimpl_fntype)(nvtx_cl_event evnt, const char* name);
118
+ typedef void (NVTX_API * nvtxNameClEventW_fakeimpl_fntype)(nvtx_cl_event evnt, const wchar_t* name);
119
+
120
+ /* Real impl types are defined in nvtxImplCudaRt_v3.h, where CUDART headers are included */
121
+ typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
122
+ typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
123
+ typedef void (NVTX_API * nvtxNameCudaStreamA_fakeimpl_fntype)(nvtx_cudaStream_t stream, const char* name);
124
+ typedef void (NVTX_API * nvtxNameCudaStreamW_fakeimpl_fntype)(nvtx_cudaStream_t stream, const wchar_t* name);
125
+ typedef void (NVTX_API * nvtxNameCudaEventA_fakeimpl_fntype)(nvtx_cudaEvent_t event, const char* name);
126
+ typedef void (NVTX_API * nvtxNameCudaEventW_fakeimpl_fntype)(nvtx_cudaEvent_t event, const wchar_t* name);
127
+
128
+ typedef void (NVTX_API * nvtxDomainMarkEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
129
+ typedef nvtxRangeId_t (NVTX_API * nvtxDomainRangeStartEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
130
+ typedef void (NVTX_API * nvtxDomainRangeEnd_impl_fntype)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
131
+ typedef int (NVTX_API * nvtxDomainRangePushEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
132
+ typedef int (NVTX_API * nvtxDomainRangePop_impl_fntype)(nvtxDomainHandle_t domain);
133
+ typedef nvtxResourceHandle_t (NVTX_API * nvtxDomainResourceCreate_impl_fntype)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
134
+ typedef void (NVTX_API * nvtxDomainResourceDestroy_impl_fntype)(nvtxResourceHandle_t resource);
135
+ typedef void (NVTX_API * nvtxDomainNameCategoryA_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
136
+ typedef void (NVTX_API * nvtxDomainNameCategoryW_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
137
+ typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringA_impl_fntype)(nvtxDomainHandle_t domain, const char* string);
138
+ typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringW_impl_fntype)(nvtxDomainHandle_t domain, const wchar_t* string);
139
+ typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateA_impl_fntype)(const char* message);
140
+ typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateW_impl_fntype)(const wchar_t* message);
141
+ typedef void (NVTX_API * nvtxDomainDestroy_impl_fntype)(nvtxDomainHandle_t domain);
142
+ typedef void (NVTX_API * nvtxInitialize_impl_fntype)(const void* reserved);
143
+
144
+ typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
145
+ typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
146
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
147
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
148
+ typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
149
+ typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
150
+
151
+ /* ---------------- Types for callback subscription --------------------- */
152
+
153
+ typedef const void *(NVTX_API * NvtxGetExportTableFunc_t)(uint32_t exportTableId);
154
+ typedef int (NVTX_API * NvtxInitializeInjectionNvtxFunc_t)(NvtxGetExportTableFunc_t exportTable);
155
+
156
+ typedef enum NvtxCallbackModule
157
+ {
158
+ NVTX_CB_MODULE_INVALID = 0,
159
+ NVTX_CB_MODULE_CORE = 1,
160
+ NVTX_CB_MODULE_CUDA = 2,
161
+ NVTX_CB_MODULE_OPENCL = 3,
162
+ NVTX_CB_MODULE_CUDART = 4,
163
+ NVTX_CB_MODULE_CORE2 = 5,
164
+ NVTX_CB_MODULE_SYNC = 6,
165
+ /* --- New constants must only be added directly above this line --- */
166
+ NVTX_CB_MODULE_SIZE,
167
+ NVTX_CB_MODULE_FORCE_INT = 0x7fffffff
168
+ } NvtxCallbackModule;
169
+
170
+ typedef enum NvtxCallbackIdCore
171
+ {
172
+ NVTX_CBID_CORE_INVALID = 0,
173
+ NVTX_CBID_CORE_MarkEx = 1,
174
+ NVTX_CBID_CORE_MarkA = 2,
175
+ NVTX_CBID_CORE_MarkW = 3,
176
+ NVTX_CBID_CORE_RangeStartEx = 4,
177
+ NVTX_CBID_CORE_RangeStartA = 5,
178
+ NVTX_CBID_CORE_RangeStartW = 6,
179
+ NVTX_CBID_CORE_RangeEnd = 7,
180
+ NVTX_CBID_CORE_RangePushEx = 8,
181
+ NVTX_CBID_CORE_RangePushA = 9,
182
+ NVTX_CBID_CORE_RangePushW = 10,
183
+ NVTX_CBID_CORE_RangePop = 11,
184
+ NVTX_CBID_CORE_NameCategoryA = 12,
185
+ NVTX_CBID_CORE_NameCategoryW = 13,
186
+ NVTX_CBID_CORE_NameOsThreadA = 14,
187
+ NVTX_CBID_CORE_NameOsThreadW = 15,
188
+ /* --- New constants must only be added directly above this line --- */
189
+ NVTX_CBID_CORE_SIZE,
190
+ NVTX_CBID_CORE_FORCE_INT = 0x7fffffff
191
+ } NvtxCallbackIdCore;
192
+
193
+ typedef enum NvtxCallbackIdCore2
194
+ {
195
+ NVTX_CBID_CORE2_INVALID = 0,
196
+ NVTX_CBID_CORE2_DomainMarkEx = 1,
197
+ NVTX_CBID_CORE2_DomainRangeStartEx = 2,
198
+ NVTX_CBID_CORE2_DomainRangeEnd = 3,
199
+ NVTX_CBID_CORE2_DomainRangePushEx = 4,
200
+ NVTX_CBID_CORE2_DomainRangePop = 5,
201
+ NVTX_CBID_CORE2_DomainResourceCreate = 6,
202
+ NVTX_CBID_CORE2_DomainResourceDestroy = 7,
203
+ NVTX_CBID_CORE2_DomainNameCategoryA = 8,
204
+ NVTX_CBID_CORE2_DomainNameCategoryW = 9,
205
+ NVTX_CBID_CORE2_DomainRegisterStringA = 10,
206
+ NVTX_CBID_CORE2_DomainRegisterStringW = 11,
207
+ NVTX_CBID_CORE2_DomainCreateA = 12,
208
+ NVTX_CBID_CORE2_DomainCreateW = 13,
209
+ NVTX_CBID_CORE2_DomainDestroy = 14,
210
+ NVTX_CBID_CORE2_Initialize = 15,
211
+ /* --- New constants must only be added directly above this line --- */
212
+ NVTX_CBID_CORE2_SIZE,
213
+ NVTX_CBID_CORE2_FORCE_INT = 0x7fffffff
214
+ } NvtxCallbackIdCore2;
215
+
216
+ typedef enum NvtxCallbackIdCuda
217
+ {
218
+ NVTX_CBID_CUDA_INVALID = 0,
219
+ NVTX_CBID_CUDA_NameCuDeviceA = 1,
220
+ NVTX_CBID_CUDA_NameCuDeviceW = 2,
221
+ NVTX_CBID_CUDA_NameCuContextA = 3,
222
+ NVTX_CBID_CUDA_NameCuContextW = 4,
223
+ NVTX_CBID_CUDA_NameCuStreamA = 5,
224
+ NVTX_CBID_CUDA_NameCuStreamW = 6,
225
+ NVTX_CBID_CUDA_NameCuEventA = 7,
226
+ NVTX_CBID_CUDA_NameCuEventW = 8,
227
+ /* --- New constants must only be added directly above this line --- */
228
+ NVTX_CBID_CUDA_SIZE,
229
+ NVTX_CBID_CUDA_FORCE_INT = 0x7fffffff
230
+ } NvtxCallbackIdCuda;
231
+
232
+ typedef enum NvtxCallbackIdCudaRt
233
+ {
234
+ NVTX_CBID_CUDART_INVALID = 0,
235
+ NVTX_CBID_CUDART_NameCudaDeviceA = 1,
236
+ NVTX_CBID_CUDART_NameCudaDeviceW = 2,
237
+ NVTX_CBID_CUDART_NameCudaStreamA = 3,
238
+ NVTX_CBID_CUDART_NameCudaStreamW = 4,
239
+ NVTX_CBID_CUDART_NameCudaEventA = 5,
240
+ NVTX_CBID_CUDART_NameCudaEventW = 6,
241
+ /* --- New constants must only be added directly above this line --- */
242
+ NVTX_CBID_CUDART_SIZE,
243
+ NVTX_CBID_CUDART_FORCE_INT = 0x7fffffff
244
+ } NvtxCallbackIdCudaRt;
245
+
246
+ typedef enum NvtxCallbackIdOpenCL
247
+ {
248
+ NVTX_CBID_OPENCL_INVALID = 0,
249
+ NVTX_CBID_OPENCL_NameClDeviceA = 1,
250
+ NVTX_CBID_OPENCL_NameClDeviceW = 2,
251
+ NVTX_CBID_OPENCL_NameClContextA = 3,
252
+ NVTX_CBID_OPENCL_NameClContextW = 4,
253
+ NVTX_CBID_OPENCL_NameClCommandQueueA = 5,
254
+ NVTX_CBID_OPENCL_NameClCommandQueueW = 6,
255
+ NVTX_CBID_OPENCL_NameClMemObjectA = 7,
256
+ NVTX_CBID_OPENCL_NameClMemObjectW = 8,
257
+ NVTX_CBID_OPENCL_NameClSamplerA = 9,
258
+ NVTX_CBID_OPENCL_NameClSamplerW = 10,
259
+ NVTX_CBID_OPENCL_NameClProgramA = 11,
260
+ NVTX_CBID_OPENCL_NameClProgramW = 12,
261
+ NVTX_CBID_OPENCL_NameClEventA = 13,
262
+ NVTX_CBID_OPENCL_NameClEventW = 14,
263
+ /* --- New constants must only be added directly above this line --- */
264
+ NVTX_CBID_OPENCL_SIZE,
265
+ NVTX_CBID_OPENCL_FORCE_INT = 0x7fffffff
266
+ } NvtxCallbackIdOpenCL;
267
+
268
+ typedef enum NvtxCallbackIdSync
269
+ {
270
+ NVTX_CBID_SYNC_INVALID = 0,
271
+ NVTX_CBID_SYNC_DomainSyncUserCreate = 1,
272
+ NVTX_CBID_SYNC_DomainSyncUserDestroy = 2,
273
+ NVTX_CBID_SYNC_DomainSyncUserAcquireStart = 3,
274
+ NVTX_CBID_SYNC_DomainSyncUserAcquireFailed = 4,
275
+ NVTX_CBID_SYNC_DomainSyncUserAcquireSuccess = 5,
276
+ NVTX_CBID_SYNC_DomainSyncUserReleasing = 6,
277
+ /* --- New constants must only be added directly above this line --- */
278
+ NVTX_CBID_SYNC_SIZE,
279
+ NVTX_CBID_SYNC_FORCE_INT = 0x7fffffff
280
+ } NvtxCallbackIdSync;
281
+
282
+ /* IDs for NVTX Export Tables */
283
+ typedef enum NvtxExportTableID
284
+ {
285
+ NVTX_ETID_INVALID = 0,
286
+ NVTX_ETID_CALLBACKS = 1,
287
+ NVTX_ETID_RESERVED0 = 2,
288
+ NVTX_ETID_VERSIONINFO = 3,
289
+ /* --- New constants must only be added directly above this line --- */
290
+ NVTX_ETID_SIZE,
291
+ NVTX_ETID_FORCE_INT = 0x7fffffff
292
+ } NvtxExportTableID;
293
+
294
+ typedef void (* NvtxFunctionPointer)(void); /* generic uncallable function pointer, must be casted to appropriate function type */
295
+ typedef NvtxFunctionPointer** NvtxFunctionTable; /* double pointer because array(1) of pointers(2) to function pointers */
296
+
297
+ typedef struct NvtxExportTableCallbacks
298
+ {
299
+ size_t struct_size;
300
+
301
+ /* returns an array of pointer to function pointers*/
302
+ int (NVTX_API *GetModuleFunctionTable)(
303
+ NvtxCallbackModule module,
304
+ NvtxFunctionTable* out_table,
305
+ unsigned int* out_size);
306
+ } NvtxExportTableCallbacks;
307
+
308
+ typedef struct NvtxExportTableVersionInfo
309
+ {
310
+ /* sizeof(NvtxExportTableVersionInfo) */
311
+ size_t struct_size;
312
+
313
+ /* The API version comes from the NVTX library linked to the app. The
314
+ * injection library is can use this info to make some assumptions */
315
+ uint32_t version;
316
+
317
+ /* Reserved for alignment, do not use */
318
+ uint32_t reserved0;
319
+
320
+ /* This must be set by tools when attaching to provide applications
321
+ * the ability to, in emergency situations, detect problematic tools
322
+ * versions and modify the NVTX source to prevent attaching anything
323
+ * that causes trouble in the app. Currently, this value is ignored. */
324
+ void (NVTX_API *SetInjectionNvtxVersion)(
325
+ uint32_t version);
326
+ } NvtxExportTableVersionInfo;
327
+
328
+
329
+
330
+
331
+
332
+
333
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/nvtx/lib/__init__.py ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/LICENSE ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This software is made available under the terms of *either* of the licenses
2
+ found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
3
+ under the terms of *both* these licenses.
tuning-competition-baseline/.venv/lib/python3.11/site-packages/packaging-24.2.dist-info/RECORD ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ packaging-24.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ packaging-24.2.dist-info/LICENSE,sha256=ytHvW9NA1z4HS6YU0m996spceUDD2MNIUuZcSQlobEg,197
3
+ packaging-24.2.dist-info/LICENSE.APACHE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
4
+ packaging-24.2.dist-info/LICENSE.BSD,sha256=tw5-m3QvHMb5SLNMFqo5_-zpQZY2S8iP8NIYDwAo-sU,1344
5
+ packaging-24.2.dist-info/METADATA,sha256=ohH86s6k5mIfQxY2TS0LcSfADeOFa4BiCC-bxZV-pNs,3204
6
+ packaging-24.2.dist-info/RECORD,,
7
+ packaging-24.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ packaging-24.2.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
9
+ packaging/__init__.py,sha256=dk4Ta_vmdVJxYHDcfyhvQNw8V3PgSBomKNXqg-D2JDY,494
10
+ packaging/__pycache__/__init__.cpython-311.pyc,,
11
+ packaging/__pycache__/_elffile.cpython-311.pyc,,
12
+ packaging/__pycache__/_manylinux.cpython-311.pyc,,
13
+ packaging/__pycache__/_musllinux.cpython-311.pyc,,
14
+ packaging/__pycache__/_parser.cpython-311.pyc,,
15
+ packaging/__pycache__/_structures.cpython-311.pyc,,
16
+ packaging/__pycache__/_tokenizer.cpython-311.pyc,,
17
+ packaging/__pycache__/markers.cpython-311.pyc,,
18
+ packaging/__pycache__/metadata.cpython-311.pyc,,
19
+ packaging/__pycache__/requirements.cpython-311.pyc,,
20
+ packaging/__pycache__/specifiers.cpython-311.pyc,,
21
+ packaging/__pycache__/tags.cpython-311.pyc,,
22
+ packaging/__pycache__/utils.cpython-311.pyc,,
23
+ packaging/__pycache__/version.cpython-311.pyc,,
24
+ packaging/_elffile.py,sha256=cflAQAkE25tzhYmq_aCi72QfbT_tn891tPzfpbeHOwE,3306
25
+ packaging/_manylinux.py,sha256=vl5OCoz4kx80H5rwXKeXWjl9WNISGmr4ZgTpTP9lU9c,9612
26
+ packaging/_musllinux.py,sha256=p9ZqNYiOItGee8KcZFeHF_YcdhVwGHdK6r-8lgixvGQ,2694
27
+ packaging/_parser.py,sha256=s_TvTvDNK0NrM2QB3VKThdWFM4Nc0P6JnkObkl3MjpM,10236
28
+ packaging/_structures.py,sha256=q3eVNmbWJGG_S0Dit_S3Ao8qQqz_5PYTXFAKBZe5yr4,1431
29
+ packaging/_tokenizer.py,sha256=J6v5H7Jzvb-g81xp_2QACKwO7LxHQA6ikryMU7zXwN8,5273
30
+ packaging/licenses/__init__.py,sha256=1x5M1nEYjcgwEbLt0dXwz2ukjr18DiCzC0sraQqJ-Ww,5715
31
+ packaging/licenses/__pycache__/__init__.cpython-311.pyc,,
32
+ packaging/licenses/__pycache__/_spdx.cpython-311.pyc,,
33
+ packaging/licenses/_spdx.py,sha256=oAm1ztPFwlsmCKe7lAAsv_OIOfS1cWDu9bNBkeu-2ns,48398
34
+ packaging/markers.py,sha256=c89TNzB7ZdGYhkovm6PYmqGyHxXlYVaLW591PHUNKD8,10561
35
+ packaging/metadata.py,sha256=YJibM7GYe4re8-0a3OlXmGS-XDgTEoO4tlBt2q25Bng,34762
36
+ packaging/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ packaging/requirements.py,sha256=gYyRSAdbrIyKDY66ugIDUQjRMvxkH2ALioTmX3tnL6o,2947
38
+ packaging/specifiers.py,sha256=GG1wPNMcL0fMJO68vF53wKMdwnfehDcaI-r9NpTfilA,40074
39
+ packaging/tags.py,sha256=CFqrJzAzc2XNGexerH__T-Y5Iwq7WbsYXsiLERLWxY0,21014
40
+ packaging/utils.py,sha256=0F3Hh9OFuRgrhTgGZUl5K22Fv1YP2tZl1z_2gO6kJiA,5050
41
+ packaging/version.py,sha256=olfyuk_DPbflNkJ4wBWetXQ17c74x3DB501degUv7DY,16676